aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CREDITS2
-rw-r--r--Documentation/RCU/trace.txt144
-rw-r--r--Documentation/accounting/getdelays.c1
-rw-r--r--Documentation/dontdiff26
-rw-r--r--Documentation/filesystems/Locking212
-rw-r--r--Documentation/kernel-docs.txt27
-rw-r--r--Documentation/kernel-parameters.txt23
-rw-r--r--Documentation/scsi/scsi_mid_low_api.txt59
-rw-r--r--Documentation/trace/events-power.txt90
-rw-r--r--Documentation/trace/postprocess/trace-vmscan-postprocess.pl11
-rw-r--r--Documentation/x86/boot.txt1
-rw-r--r--MAINTAINERS39
-rw-r--r--Makefile2
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/alpha/include/asm/perf_event.h6
-rw-r--r--arch/alpha/kernel/irq_alpha.c2
-rw-r--r--arch/alpha/kernel/perf_event.c11
-rw-r--r--arch/arm/common/it8152.c1
-rw-r--r--arch/arm/include/asm/hardware/it8152.h1
-rw-r--r--arch/arm/include/asm/highmem.h3
-rw-r--r--arch/arm/include/asm/sizes.h6
-rw-r--r--arch/arm/include/asm/system.h1
-rw-r--r--arch/arm/kernel/entry-common.S6
-rw-r--r--arch/arm/kernel/perf_event.c4
-rw-r--r--arch/arm/kernel/smp.c1
-rw-r--r--arch/arm/mach-at91/include/mach/at91_mci.h2
-rw-r--r--arch/arm/mach-ixp4xx/common-pci.c2
-rw-r--r--arch/arm/mach-pxa/Kconfig1
-rw-r--r--arch/arm/mach-pxa/sleep.S4
-rw-r--r--arch/arm/mm/cache-feroceon-l2.c37
-rw-r--r--arch/arm/mm/cache-xsc3l2.c57
-rw-r--r--arch/arm/mm/dma-mapping.c7
-rw-r--r--arch/arm/mm/flush.c7
-rw-r--r--arch/arm/mm/highmem.c87
-rw-r--r--arch/mips/kernel/perf_event_mipsxx.c2
-rw-r--r--arch/mn10300/kernel/irq.c2
-rw-r--r--arch/powerpc/kernel/e500-pmu.c2
-rw-r--r--arch/powerpc/kernel/mpc7450-pmu.c2
-rw-r--r--arch/powerpc/kernel/perf_event.c2
-rw-r--r--arch/powerpc/kernel/perf_event_fsl_emb.c2
-rw-r--r--arch/powerpc/kernel/power4-pmu.c2
-rw-r--r--arch/powerpc/kernel/power5+-pmu.c2
-rw-r--r--arch/powerpc/kernel/power5-pmu.c2
-rw-r--r--arch/powerpc/kernel/power6-pmu.c2
-rw-r--r--arch/powerpc/kernel/power7-pmu.c2
-rw-r--r--arch/powerpc/kernel/ppc970-pmu.c2
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_gpt.c1
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/include/asm/mutex.h2
-rw-r--r--arch/sh/boards/mach-se/7206/irq.c2
-rw-r--r--arch/sh/kernel/cpu/sh2a/clock-sh7201.c2
-rw-r--r--arch/sh/kernel/cpu/sh4/clock-sh4-202.c3
-rw-r--r--arch/sh/kernel/cpu/sh4/perf_event.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/perf_event.c2
-rw-r--r--arch/sh/kernel/perf_event.c2
-rw-r--r--arch/sparc/include/asm/perf_event.h4
-rw-r--r--arch/sparc/kernel/nmi.c2
-rw-r--r--arch/sparc/kernel/perf_event.c9
-rw-r--r--arch/x86/Kconfig41
-rw-r--r--arch/x86/Kconfig.debug11
-rw-r--r--arch/x86/boot/compressed/head_64.S2
-rw-r--r--arch/x86/include/asm/alternative.h7
-rw-r--r--arch/x86/include/asm/amd_nb.h49
-rw-r--r--arch/x86/include/asm/apic.h1
-rw-r--r--arch/x86/include/asm/apicdef.h1
-rw-r--r--arch/x86/include/asm/bootparam.h1
-rw-r--r--arch/x86/include/asm/fixmap.h4
-rw-r--r--arch/x86/include/asm/io_apic.h8
-rw-r--r--arch/x86/include/asm/irq.h4
-rw-r--r--arch/x86/include/asm/kdebug.h2
-rw-r--r--arch/x86/include/asm/microcode.h6
-rw-r--r--arch/x86/include/asm/mpspec.h31
-rw-r--r--arch/x86/include/asm/mpspec_def.h7
-rw-r--r--arch/x86/include/asm/mrst-vrtc.h9
-rw-r--r--arch/x86/include/asm/mrst.h14
-rw-r--r--arch/x86/include/asm/msr-index.h4
-rw-r--r--arch/x86/include/asm/nmi.h53
-rw-r--r--arch/x86/include/asm/pci.h1
-rw-r--r--arch/x86/include/asm/perf_event.h2
-rw-r--r--arch/x86/include/asm/perf_event_p4.h63
-rw-r--r--arch/x86/include/asm/setup.h6
-rw-r--r--arch/x86/include/asm/smpboot_hooks.h1
-rw-r--r--arch/x86/include/asm/stacktrace.h33
-rw-r--r--arch/x86/include/asm/timer.h6
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/boot.c11
-rw-r--r--arch/x86/kernel/alternative.c49
-rw-r--r--arch/x86/kernel/amd_nb.c135
-rw-r--r--arch/x86/kernel/apb_timer.c1
-rw-r--r--arch/x86/kernel/aperture_64.c10
-rw-r--r--arch/x86/kernel/apic/Makefile5
-rw-r--r--arch/x86/kernel/apic/apic.c119
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c36
-rw-r--r--arch/x86/kernel/apic/io_apic.c83
-rw-r--r--arch/x86/kernel/apic/nmi.c567
-rw-r--r--arch/x86/kernel/cpu/common.c1
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c147
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c135
-rw-r--r--arch/x86/kernel/cpu/perf_event.c74
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c16
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c26
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c644
-rw-r--r--arch/x86/kernel/dumpstack.c12
-rw-r--r--arch/x86/kernel/dumpstack_32.c25
-rw-r--r--arch/x86/kernel/dumpstack_64.c24
-rw-r--r--arch/x86/kernel/early_printk.c3
-rw-r--r--arch/x86/kernel/ftrace.c3
-rw-r--r--arch/x86/kernel/head32.c3
-rw-r--r--arch/x86/kernel/head_32.S6
-rw-r--r--arch/x86/kernel/kprobes.c113
-rw-r--r--arch/x86/kernel/microcode_amd.c34
-rw-r--r--arch/x86/kernel/microcode_intel.c16
-rw-r--r--arch/x86/kernel/pci-gart_64.c34
-rw-r--r--arch/x86/kernel/process.c10
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/reboot_fixups_32.c16
-rw-r--r--arch/x86/kernel/setup.c30
-rw-r--r--arch/x86/kernel/smpboot.c22
-rw-r--r--arch/x86/kernel/stacktrace.c8
-rw-r--r--arch/x86/kernel/time.c18
-rw-r--r--arch/x86/kernel/trampoline_64.S2
-rw-r--r--arch/x86/kernel/traps.c35
-rw-r--r--arch/x86/kernel/tsc.c96
-rw-r--r--arch/x86/kernel/verify_cpu.S (renamed from arch/x86/kernel/verify_cpu_64.S)49
-rw-r--r--arch/x86/kernel/vmlinux.lds.S8
-rw-r--r--arch/x86/kvm/i8259.c2
-rw-r--r--arch/x86/kvm/mmu.c3
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/amdtopology_64.c (renamed from arch/x86/mm/k8topology_64.c)12
-rw-r--r--arch/x86/mm/init.c3
-rw-r--r--arch/x86/mm/init_32.c20
-rw-r--r--arch/x86/mm/kmemcheck/error.c2
-rw-r--r--arch/x86/mm/numa_64.c22
-rw-r--r--arch/x86/mm/pageattr.c33
-rw-r--r--arch/x86/mm/setup_nx.c2
-rw-r--r--arch/x86/mm/srat_32.c1
-rw-r--r--arch/x86/mm/srat_64.c10
-rw-r--r--arch/x86/oprofile/backtrace.c2
-rw-r--r--arch/x86/oprofile/nmi_int.c3
-rw-r--r--arch/x86/oprofile/nmi_timer_int.c3
-rw-r--r--arch/x86/oprofile/op_model_amd.c79
-rw-r--r--arch/x86/oprofile/op_model_p4.c2
-rw-r--r--arch/x86/pci/Makefile1
-rw-r--r--arch/x86/pci/ce4100.c315
-rw-r--r--arch/x86/pci/pcbios.c23
-rw-r--r--arch/x86/platform/Makefile2
-rw-r--r--arch/x86/platform/ce4100/Makefile1
-rw-r--r--arch/x86/platform/ce4100/ce4100.c132
-rw-r--r--arch/x86/platform/iris/Makefile1
-rw-r--r--arch/x86/platform/iris/iris.c91
-rw-r--r--arch/x86/platform/mrst/Makefile2
-rw-r--r--arch/x86/platform/mrst/early_printk_mrst.c (renamed from arch/x86/kernel/early_printk_mrst.c)0
-rw-r--r--arch/x86/platform/mrst/mrst.c546
-rw-r--r--arch/x86/platform/mrst/vrtc.c165
-rw-r--r--arch/x86/platform/sfi/sfi.c4
-rw-r--r--arch/x86/platform/visws/visws_quirks.c2
-rw-r--r--drivers/acpi/acpica/evgpeinit.c3
-rw-r--r--drivers/acpi/acpica/nsinit.c2
-rw-r--r--drivers/acpi/battery.c5
-rw-r--r--drivers/acpi/numa.c14
-rw-r--r--drivers/acpi/scan.c97
-rw-r--r--drivers/ata/Kconfig22
-rw-r--r--drivers/ata/Makefile2
-rw-r--r--drivers/ata/libata-core.c24
-rw-r--r--drivers/ata/libata-eh.c17
-rw-r--r--drivers/ata/libata-sff.c7
-rw-r--r--drivers/ata/pata_cs5536.c20
-rw-r--r--drivers/atm/atmtcp.c5
-rw-r--r--drivers/bluetooth/hci_ldisc.c6
-rw-r--r--drivers/char/agp/amd64-agp.c33
-rw-r--r--drivers/char/agp/intel-gtt.c11
-rw-r--r--drivers/char/ramoops.c12
-rw-r--r--drivers/cpufreq/cpufreq.c1
-rw-r--r--drivers/cpuidle/cpuidle.c1
-rw-r--r--drivers/dma/mv_xor.c2
-rw-r--r--drivers/edac/amd64_edac.c4
-rw-r--r--drivers/gpio/cs5535-gpio.c19
-rw-r--r--drivers/gpio/gpiolib.c3
-rw-r--r--drivers/gpio/rdc321x-gpio.c2
-rw-r--r--drivers/gpu/drm/drm_crtc_helper.c7
-rw-r--r--drivers/gpu/drm/i915/dvo_ch7017.c2
-rw-r--r--drivers/gpu/drm/i915/i915_dma.c23
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h10
-rw-r--r--drivers/gpu/drm/i915/intel_display.c21
-rw-r--r--drivers/gpu/drm/i915/intel_dp.c37
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c19
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h5
-rw-r--r--drivers/gpu/drm/i915/intel_sdvo.c12
-rw-r--r--drivers/gpu/drm/radeon/atombios_crtc.c7
-rw-r--r--drivers/gpu/drm/radeon/evergreen.c27
-rw-r--r--drivers/gpu/drm/radeon/evergreend.h1
-rw-r--r--drivers/gpu/drm/radeon/r600.c10
-rw-r--r--drivers/gpu/drm/radeon/r600_cs.c9
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c9
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.c19
-rw-r--r--drivers/gpu/drm/radeon/radeon_fb.c2
-rw-r--r--drivers/hwmon/s3c-hwmon.c2
-rw-r--r--drivers/idle/intel_idle.c3
-rw-r--r--drivers/isdn/gigaset/capi.c1
-rw-r--r--drivers/leds/led-class.c2
-rw-r--r--drivers/media/IR/keymaps/rc-rc6-mce.c21
-rw-r--r--drivers/media/IR/lirc_dev.c29
-rw-r--r--drivers/media/IR/mceusb.c174
-rw-r--r--drivers/media/IR/nuvoton-cir.c10
-rw-r--r--drivers/media/IR/streamzap.c21
-rw-r--r--drivers/media/video/cx25840/cx25840-core.c19
-rw-r--r--drivers/media/video/cx88/cx88-alsa.c99
-rw-r--r--drivers/media/video/cx88/cx88-cards.c7
-rw-r--r--drivers/media/video/cx88/cx88-video.c27
-rw-r--r--drivers/media/video/cx88/cx88.h6
-rw-r--r--drivers/media/video/em28xx/em28xx-video.c2
-rw-r--r--drivers/media/video/mx2_camera.c2
-rw-r--r--drivers/media/video/s5p-fimc/fimc-capture.c51
-rw-r--r--drivers/media/video/s5p-fimc/fimc-core.c54
-rw-r--r--drivers/media/video/s5p-fimc/fimc-core.h24
-rw-r--r--drivers/media/video/s5p-fimc/regs-fimc.h3
-rw-r--r--drivers/media/video/sh_mobile_ceu_camera.c2
-rw-r--r--drivers/media/video/soc_camera.c4
-rw-r--r--drivers/media/video/wm8775.c104
-rw-r--r--drivers/mfd/ab8500-core.c2
-rw-r--r--drivers/mfd/wm831x-core.c8
-rw-r--r--drivers/mmc/core/core.c1
-rw-r--r--drivers/mmc/host/at91_mci.c13
-rw-r--r--drivers/mmc/host/atmel-mci.c18
-rw-r--r--drivers/net/atl1c/atl1c_main.c39
-rw-r--r--drivers/net/atlx/atl1.c10
-rw-r--r--drivers/net/benet/be.h2
-rw-r--r--drivers/net/benet/be_cmds.c75
-rw-r--r--drivers/net/benet/be_main.c2
-rw-r--r--drivers/net/bonding/bond_ipv6.c7
-rw-r--r--drivers/net/bonding/bond_main.c42
-rw-r--r--drivers/net/bonding/bonding.h4
-rw-r--r--drivers/net/cnic.c10
-rw-r--r--drivers/net/ehea/ehea_ethtool.c7
-rw-r--r--drivers/net/epic100.c4
-rw-r--r--drivers/net/hamachi.c4
-rw-r--r--drivers/net/pcmcia/axnet_cs.c1
-rw-r--r--drivers/net/pcmcia/pcnet_cs.c1
-rw-r--r--drivers/net/ppp_generic.c9
-rw-r--r--drivers/net/skfp/skfddi.c2
-rw-r--r--drivers/net/starfire.c2
-rw-r--r--drivers/net/sundance.c4
-rw-r--r--drivers/net/tehuti.c4
-rw-r--r--drivers/net/tg3.c2
-rw-r--r--drivers/net/typhoon.c1
-rw-r--r--drivers/net/usb/asix.c4
-rw-r--r--drivers/net/usb/mcs7830.c14
-rw-r--r--drivers/net/veth.c4
-rw-r--r--drivers/net/wireless/hostap/hostap_main.c1
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-1000.c2
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-6000.c12
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-agn-eeprom.c88
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-agn-lib.c6
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-core.h1
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-eeprom.h25
-rw-r--r--drivers/net/wireless/libertas/cfg.c2
-rw-r--r--drivers/net/wireless/p54/p54usb.c6
-rw-r--r--drivers/net/wireless/rt2x00/rt2800pci.c1
-rw-r--r--drivers/net/wireless/rt2x00/rt2x00.h1
-rw-r--r--drivers/net/wireless/rt2x00/rt2x00dev.c9
-rw-r--r--drivers/net/yellowfin.c4
-rw-r--r--drivers/of/of_i2c.c2
-rw-r--r--drivers/pci/hotplug/pciehp_acpi.c3
-rw-r--r--drivers/platform/x86/intel_ips.c36
-rw-r--r--drivers/platform/x86/intel_ips.h21
-rw-r--r--drivers/platform/x86/intel_scu_ipc.c5
-rw-r--r--drivers/rtc/Kconfig12
-rw-r--r--drivers/rtc/Makefile1
-rw-r--r--drivers/rtc/rtc-mrst.c582
-rw-r--r--drivers/rtc/rtc-rs5c372.c2
-rw-r--r--drivers/scsi/bfa/bfa_fcs.c4
-rw-r--r--drivers/scsi/bfa/bfa_fcs_fcpim.c6
-rw-r--r--drivers/scsi/bfa/bfa_fcs_lport.c10
-rw-r--r--drivers/scsi/bfa/bfa_fcs_rport.c6
-rw-r--r--drivers/scsi/bfa/bfa_ioc.c8
-rw-r--r--drivers/scsi/bfa/bfa_svc.c28
-rw-r--r--drivers/scsi/bfa/bfad.c8
-rw-r--r--drivers/scsi/bfa/bfad_drv.h2
-rw-r--r--drivers/scsi/bfa/bfad_im.c21
-rw-r--r--drivers/sh/intc/core.c1
-rw-r--r--drivers/spi/coldfire_qspi.c2
-rw-r--r--drivers/spi/mpc52xx_spi.c2
-rw-r--r--drivers/spi/omap2_mcspi.c39
-rw-r--r--drivers/spi/spi.c3
-rw-r--r--drivers/spi/spi_fsl_espi.c35
-rw-r--r--drivers/staging/zram/zram_drv.c6
-rw-r--r--drivers/usb/atm/ueagle-atm.c22
-rw-r--r--drivers/video/backlight/cr_bllcd.c1
-rw-r--r--drivers/video/fbmem.c2
-rw-r--r--drivers/video/imxfb.c9
-rw-r--r--drivers/video/sh_mobile_hdmi.c16
-rw-r--r--drivers/video/sh_mobile_lcdcfb.c30
-rw-r--r--drivers/watchdog/hpwdt.c11
-rw-r--r--drivers/watchdog/rdc321x_wdt.c2
-rw-r--r--fs/ext4/resize.c5
-rw-r--r--fs/gfs2/bmap.c11
-rw-r--r--fs/gfs2/glock.c71
-rw-r--r--fs/gfs2/glock.h28
-rw-r--r--fs/gfs2/glops.c1
-rw-r--r--fs/gfs2/incore.h12
-rw-r--r--fs/gfs2/inode.c9
-rw-r--r--fs/gfs2/lock_dlm.c15
-rw-r--r--fs/gfs2/ops_inode.c18
-rw-r--r--fs/gfs2/quota.c13
-rw-r--r--fs/gfs2/rgrp.c57
-rw-r--r--fs/gfs2/rgrp.h1
-rw-r--r--fs/gfs2/xattr.c23
-rw-r--r--fs/logfs/journal.c2
-rw-r--r--fs/logfs/readwrite.c3
-rw-r--r--fs/ocfs2/aops.c7
-rw-r--r--fs/ocfs2/aops.h23
-rw-r--r--fs/ocfs2/cluster/masklog.c3
-rw-r--r--fs/ocfs2/cluster/masklog.h15
-rw-r--r--fs/ocfs2/dir.c4
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c40
-rw-r--r--fs/ocfs2/file.c15
-rw-r--r--fs/ocfs2/ocfs2_fs.h2
-rw-r--r--fs/proc/base.c79
-rw-r--r--include/linux/completion.h8
-rw-r--r--include/linux/dmaengine.h13
-rw-r--r--include/linux/ftrace_event.h14
-rw-r--r--include/linux/hrtimer.h33
-rw-r--r--include/linux/init_task.h18
-rw-r--r--include/linux/kprobes.h4
-rw-r--r--include/linux/kthread.h45
-rw-r--r--include/linux/module.h11
-rw-r--r--include/linux/mutex.h4
-rw-r--r--include/linux/netlink.h2
-rw-r--r--include/linux/nmi.h10
-rw-r--r--include/linux/perf_event.h26
-rw-r--r--include/linux/rculist.h5
-rw-r--r--include/linux/rcupdate.h4
-rw-r--r--include/linux/rcutiny.h13
-rw-r--r--include/linux/rcutree.h2
-rw-r--r--include/linux/sched.h51
-rw-r--r--include/linux/sfi.h8
-rw-r--r--include/linux/stacktrace.h4
-rw-r--r--include/linux/syscalls.h10
-rw-r--r--include/linux/taskstats.h3
-rw-r--r--include/linux/timer.h32
-rw-r--r--include/linux/timerqueue.h50
-rw-r--r--include/linux/tracepoint.h33
-rw-r--r--include/linux/unaligned/packed_struct.h6
-rw-r--r--include/linux/workqueue.h8
-rw-r--r--include/media/wm8775.h3
-rw-r--r--include/net/flow.h1
-rw-r--r--include/net/ip6_route.h10
-rw-r--r--include/net/mac80211.h28
-rw-r--r--include/net/pkt_cls.h4
-rw-r--r--include/net/sch_generic.h6
-rw-r--r--include/net/sock.h3
-rw-r--r--include/trace/define_trace.h15
-rw-r--r--include/trace/events/power.h98
-rw-r--r--include/trace/events/syscalls.h4
-rw-r--r--include/trace/ftrace.h21
-rw-r--r--init/Kconfig68
-rw-r--r--init/do_mounts.c2
-rw-r--r--init/main.c5
-rw-r--r--kernel/cpu.c18
-rw-r--r--kernel/fork.c7
-rw-r--r--kernel/futex.c235
-rw-r--r--kernel/hrtimer.c83
-rw-r--r--kernel/hw_breakpoint.c2
-rw-r--r--kernel/irq/manage.c4
-rw-r--r--kernel/kprobes.c565
-rw-r--r--kernel/kthread.c13
-rw-r--r--kernel/module.c171
-rw-r--r--kernel/mutex.c2
-rw-r--r--kernel/perf_event.c573
-rw-r--r--kernel/posix-timers.c10
-rw-r--r--kernel/power/suspend.c3
-rw-r--r--kernel/printk.c8
-rw-r--r--kernel/rcutiny.c105
-rw-r--r--kernel/rcutiny_plugin.h433
-rw-r--r--kernel/rcutorture.c270
-rw-r--r--kernel/rcutree.c156
-rw-r--r--kernel/rcutree.h61
-rw-r--r--kernel/rcutree_plugin.h135
-rw-r--r--kernel/rcutree_trace.c12
-rw-r--r--kernel/sched.c640
-rw-r--r--kernel/sched_autogroup.c238
-rw-r--r--kernel/sched_autogroup.h32
-rw-r--r--kernel/sched_clock.c2
-rw-r--r--kernel/sched_debug.c91
-rw-r--r--kernel/sched_fair.c322
-rw-r--r--kernel/sched_features.h2
-rw-r--r--kernel/sched_rt.c24
-rw-r--r--kernel/softirq.c4
-rw-r--r--kernel/srcu.c8
-rw-r--r--kernel/sys.c4
-rw-r--r--kernel/sysctl.c53
-rw-r--r--kernel/sysctl_binary.c1
-rw-r--r--kernel/taskstats.c57
-rw-r--r--kernel/time/timecompare.c5
-rw-r--r--kernel/time/timekeeping.c9
-rw-r--r--kernel/time/timer_list.c8
-rw-r--r--kernel/timer.c50
-rw-r--r--kernel/trace/Kconfig15
-rw-r--r--kernel/trace/power-traces.c5
-rw-r--r--kernel/trace/ring_buffer.c9
-rw-r--r--kernel/trace/trace_event_perf.c31
-rw-r--r--kernel/trace/trace_events.c6
-rw-r--r--kernel/trace/trace_export.c14
-rw-r--r--kernel/trace/trace_selftest.c2
-rw-r--r--kernel/user.c1
-rw-r--r--kernel/watchdog.c14
-rw-r--r--lib/Kconfig.debug3
-rw-r--r--lib/Makefile2
-rw-r--r--lib/timerqueue.c107
-rw-r--r--mm/compaction.c1
-rw-r--r--mm/memcontrol.c19
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/nommu.c28
-rw-r--r--mm/page-writeback.c2
-rw-r--r--mm/percpu.c2
-rw-r--r--net/bluetooth/rfcomm/core.c1
-rw-r--r--net/bridge/br_multicast.c30
-rw-r--r--net/bridge/br_stp_bpdu.c2
-rw-r--r--net/can/bcm.c4
-rw-r--r--net/core/fib_rules.c3
-rw-r--r--net/core/sock.c47
-rw-r--r--net/ipv4/fib_frontend.c10
-rw-r--r--net/ipv4/route.c15
-rw-r--r--net/ipv4/tcp_ipv4.c4
-rw-r--r--net/ipv4/udp.c1
-rw-r--r--net/ipv4/udplite.c1
-rw-r--r--net/ipv6/addrconf.c4
-rw-r--r--net/ipv6/ip6_output.c12
-rw-r--r--net/ipv6/route.c7
-rw-r--r--net/ipv6/udp.c1
-rw-r--r--net/ipv6/udplite.c1
-rw-r--r--net/ipv6/xfrm6_output.c16
-rw-r--r--net/irda/af_irda.c18
-rw-r--r--net/mac80211/ibss.c4
-rw-r--r--net/mac80211/rx.c5
-rw-r--r--net/mac80211/work.c5
-rw-r--r--net/sched/sch_sfq.c20
-rw-r--r--net/sctp/socket.c2
-rw-r--r--scripts/Makefile.build13
-rw-r--r--scripts/kconfig/menu.c14
-rwxr-xr-xscripts/kernel-doc102
-rw-r--r--security/integrity/ima/ima_policy.c2
-rw-r--r--security/keys/request_key.c1
-rw-r--r--sound/core/pcm_lib.c10
-rw-r--r--sound/oss/soundcard.c4
-rw-r--r--sound/pci/hda/hda_codec.c57
-rw-r--r--sound/pci/hda/hda_intel.c1
-rw-r--r--sound/pci/hda/patch_realtek.c64
-rw-r--r--sound/pci/hda/patch_sigmatel.c5
-rw-r--r--sound/soc/codecs/max98088.c10
-rw-r--r--sound/soc/codecs/wm8523.c9
-rw-r--r--sound/soc/codecs/wm8741.c10
-rw-r--r--sound/soc/codecs/wm8753.c226
-rw-r--r--sound/soc/codecs/wm8904.c37
-rw-r--r--sound/soc/codecs/wm8940.c1
-rw-r--r--sound/soc/codecs/wm8955.c31
-rw-r--r--sound/soc/codecs/wm8960.c1
-rw-r--r--sound/soc/codecs/wm8962.c45
-rw-r--r--sound/soc/codecs/wm8971.c1
-rw-r--r--sound/soc/codecs/wm9081.c1
-rw-r--r--sound/soc/codecs/wm9090.c18
-rw-r--r--tools/perf/Documentation/perf-annotate.txt37
-rw-r--r--tools/perf/Documentation/perf-buildid-list.txt3
-rw-r--r--tools/perf/Documentation/perf-diff.txt21
-rw-r--r--tools/perf/Documentation/perf-kvm.txt8
-rw-r--r--tools/perf/Documentation/perf-lock.txt15
-rw-r--r--tools/perf/Documentation/perf-probe.txt4
-rw-r--r--tools/perf/Documentation/perf-record.txt22
-rw-r--r--tools/perf/Documentation/perf-report.txt55
-rw-r--r--tools/perf/Documentation/perf-sched.txt18
-rw-r--r--tools/perf/Documentation/perf-script-perl.txt (renamed from tools/perf/Documentation/perf-trace-perl.txt)28
-rw-r--r--tools/perf/Documentation/perf-script-python.txt (renamed from tools/perf/Documentation/perf-trace-python.txt)88
-rw-r--r--tools/perf/Documentation/perf-script.txt (renamed from tools/perf/Documentation/perf-trace.txt)61
-rw-r--r--tools/perf/Documentation/perf-stat.txt44
-rw-r--r--tools/perf/Documentation/perf-test.txt2
-rw-r--r--tools/perf/Documentation/perf-timechart.txt2
-rw-r--r--tools/perf/Documentation/perf-top.txt28
-rw-r--r--tools/perf/MANIFEST1
-rw-r--r--tools/perf/Makefile23
-rw-r--r--tools/perf/bench/mem-memcpy-arch.h12
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm-def.h4
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm.S2
-rw-r--r--tools/perf/bench/mem-memcpy.c219
-rw-r--r--tools/perf/builtin-annotate.c10
-rw-r--r--tools/perf/builtin-buildid-list.c6
-rw-r--r--tools/perf/builtin-diff.c21
-rw-r--r--tools/perf/builtin-inject.c41
-rw-r--r--tools/perf/builtin-kmem.c27
-rw-r--r--tools/perf/builtin-lock.c23
-rw-r--r--tools/perf/builtin-probe.c5
-rw-r--r--tools/perf/builtin-record.c210
-rw-r--r--tools/perf/builtin-report.c23
-rw-r--r--tools/perf/builtin-sched.c33
-rw-r--r--tools/perf/builtin-script.c (renamed from tools/perf/builtin-trace.c)135
-rw-r--r--tools/perf/builtin-stat.c531
-rw-r--r--tools/perf/builtin-test.c110
-rw-r--r--tools/perf/builtin-timechart.c132
-rw-r--r--tools/perf/builtin-top.c237
-rw-r--r--tools/perf/builtin.h2
-rw-r--r--tools/perf/command-list.txt2
-rw-r--r--tools/perf/feature-tests.mak4
-rw-r--r--tools/perf/perf.c4
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/Context.c2
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/Context.xs4
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/README4
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm2
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm4
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm4
-rw-r--r--tools/perf/scripts/perl/bin/failed-syscalls-report2
-rw-r--r--tools/perf/scripts/perl/bin/rw-by-file-report5
-rw-r--r--tools/perf/scripts/perl/bin/rw-by-pid-report5
-rw-r--r--tools/perf/scripts/perl/bin/rwtop-report5
-rw-r--r--tools/perf/scripts/perl/bin/wakeup-latency-report5
-rw-r--r--tools/perf/scripts/perl/bin/workqueue-stats-report6
-rw-r--r--tools/perf/scripts/perl/check-perf-trace.pl2
-rw-r--r--tools/perf/scripts/perl/rw-by-file.pl2
-rw-r--r--tools/perf/scripts/perl/workqueue-stats.pl2
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/Context.c2
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py2
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py2
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py2
-rw-r--r--tools/perf/scripts/python/bin/failed-syscalls-by-pid-report2
-rw-r--r--tools/perf/scripts/python/bin/futex-contention-report2
-rw-r--r--tools/perf/scripts/python/bin/netdev-times-report2
-rw-r--r--tools/perf/scripts/python/bin/sched-migration-report2
-rw-r--r--tools/perf/scripts/python/bin/sctop-report2
-rw-r--r--tools/perf/scripts/python/bin/syscall-counts-by-pid-report2
-rw-r--r--tools/perf/scripts/python/bin/syscall-counts-report2
-rw-r--r--tools/perf/scripts/python/check-perf-trace.py2
-rw-r--r--tools/perf/scripts/python/failed-syscalls-by-pid.py2
-rw-r--r--tools/perf/scripts/python/sched-migration.py2
-rw-r--r--tools/perf/scripts/python/sctop.py2
-rw-r--r--tools/perf/scripts/python/syscall-counts-by-pid.py2
-rw-r--r--tools/perf/scripts/python/syscall-counts.py2
-rw-r--r--tools/perf/util/build-id.c7
-rw-r--r--tools/perf/util/cpumap.c123
-rw-r--r--tools/perf/util/cpumap.h10
-rw-r--r--tools/perf/util/debug.c42
-rw-r--r--tools/perf/util/debug.h2
-rw-r--r--tools/perf/util/event.c358
-rw-r--r--tools/perf/util/event.h30
-rw-r--r--tools/perf/util/evsel.c186
-rw-r--r--tools/perf/util/evsel.h115
-rw-r--r--tools/perf/util/header.c68
-rw-r--r--tools/perf/util/header.h5
-rw-r--r--tools/perf/util/hist.c25
-rw-r--r--tools/perf/util/hist.h2
-rw-r--r--tools/perf/util/include/asm/cpufeature.h9
-rw-r--r--tools/perf/util/include/asm/dwarf2.h11
-rw-r--r--tools/perf/util/include/linux/bitops.h5
-rw-r--r--tools/perf/util/include/linux/linkage.h13
-rw-r--r--tools/perf/util/parse-events.c111
-rw-r--r--tools/perf/util/parse-events.h19
-rw-r--r--tools/perf/util/parse-options.h4
-rw-r--r--tools/perf/util/probe-event.c249
-rw-r--r--tools/perf/util/probe-finder.c127
-rw-r--r--tools/perf/util/probe-finder.h6
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c6
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c4
-rw-r--r--tools/perf/util/session.c579
-rw-r--r--tools/perf/util/session.h26
-rw-r--r--tools/perf/util/sort.c6
-rw-r--r--tools/perf/util/string.c2
-rw-r--r--tools/perf/util/symbol.c143
-rw-r--r--tools/perf/util/symbol.h6
-rw-r--r--tools/perf/util/thread.c43
-rw-r--r--tools/perf/util/thread.h15
-rw-r--r--tools/perf/util/trace-event-info.c30
-rw-r--r--tools/perf/util/trace-event.h5
-rw-r--r--tools/perf/util/ui/util.c16
-rw-r--r--tools/perf/util/util.c17
-rw-r--r--tools/perf/util/util.h1
-rw-r--r--tools/perf/util/xyarray.c20
-rw-r--r--tools/perf/util/xyarray.h20
575 files changed, 12699 insertions, 6822 deletions
diff --git a/CREDITS b/CREDITS
index 41d8e63d5165..494b6e4746d7 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2365,8 +2365,6 @@ E: acme@redhat.com
2365W: http://oops.ghostprotocols.net:81/blog/ 2365W: http://oops.ghostprotocols.net:81/blog/
2366P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD 841A B6AB 4681 9224 DF01 2366P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD 841A B6AB 4681 9224 DF01
2367D: IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks 2367D: IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks
2368S: R. Brasílio Itiberê, 4270/1010 - Água Verde
2369S: 80240-060 - Curitiba - Paraná
2370S: Brazil 2368S: Brazil
2371 2369
2372N: Karsten Merker 2370N: Karsten Merker
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index a851118775d8..6a8c73f55b80 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -1,18 +1,22 @@
1CONFIG_RCU_TRACE debugfs Files and Formats 1CONFIG_RCU_TRACE debugfs Files and Formats
2 2
3 3
4The rcutree implementation of RCU provides debugfs trace output that 4The rcutree and rcutiny implementations of RCU provide debugfs trace
5summarizes counters and state. This information is useful for debugging 5output that summarizes counters and state. This information is useful for
6RCU itself, and can sometimes also help to debug abuses of RCU. 6debugging RCU itself, and can sometimes also help to debug abuses of RCU.
7The following sections describe the debugfs files and formats. 7The following sections describe the debugfs files and formats, first
8for rcutree and next for rcutiny.
8 9
9 10
10Hierarchical RCU debugfs Files and Formats 11CONFIG_TREE_RCU and CONFIG_TREE_PREEMPT_RCU debugfs Files and Formats
11 12
12This implementation of RCU provides three debugfs files under the 13These implementations of RCU provides five debugfs files under the
13top-level directory RCU: rcu/rcudata (which displays fields in struct 14top-level directory RCU: rcu/rcudata (which displays fields in struct
14rcu_data), rcu/rcugp (which displays grace-period counters), and 15rcu_data), rcu/rcudata.csv (which is a .csv spreadsheet version of
15rcu/rcuhier (which displays the struct rcu_node hierarchy). 16rcu/rcudata), rcu/rcugp (which displays grace-period counters),
17rcu/rcuhier (which displays the struct rcu_node hierarchy), and
18rcu/rcu_pending (which displays counts of the reasons that the
19rcu_pending() function decided that there was core RCU work to do).
16 20
17The output of "cat rcu/rcudata" looks as follows: 21The output of "cat rcu/rcudata" looks as follows:
18 22
@@ -130,7 +134,8 @@ o "ci" is the number of RCU callbacks that have been invoked for
130 been registered in absence of CPU-hotplug activity. 134 been registered in absence of CPU-hotplug activity.
131 135
132o "co" is the number of RCU callbacks that have been orphaned due to 136o "co" is the number of RCU callbacks that have been orphaned due to
133 this CPU going offline. 137 this CPU going offline. These orphaned callbacks have been moved
138 to an arbitrarily chosen online CPU.
134 139
135o "ca" is the number of RCU callbacks that have been adopted due to 140o "ca" is the number of RCU callbacks that have been adopted due to
136 other CPUs going offline. Note that ci+co-ca+ql is the number of 141 other CPUs going offline. Note that ci+co-ca+ql is the number of
@@ -168,12 +173,12 @@ o "gpnum" is the number of grace periods that have started. It is
168 173
169The output of "cat rcu/rcuhier" looks as follows, with very long lines: 174The output of "cat rcu/rcuhier" looks as follows, with very long lines:
170 175
171c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 oqlen=0 176c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6
1721/1 .>. 0:127 ^0 1771/1 .>. 0:127 ^0
1733/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3 1783/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3
1743/3f .>. 0:5 ^0 2/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3 1793/3f .>. 0:5 ^0 2/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3
175rcu_bh: 180rcu_bh:
176c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 oqlen=0 181c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0
1770/1 .>. 0:127 ^0 1820/1 .>. 0:127 ^0
1780/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3 1830/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3
1790/3f .>. 0:5 ^0 0/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3 1840/3f .>. 0:5 ^0 0/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3
@@ -212,11 +217,6 @@ o "fqlh" is the number of calls to force_quiescent_state() that
212 exited immediately (without even being counted in nfqs above) 217 exited immediately (without even being counted in nfqs above)
213 due to contention on ->fqslock. 218 due to contention on ->fqslock.
214 219
215o "oqlen" is the number of callbacks on the "orphan" callback
216 list. RCU callbacks are placed on this list by CPUs going
217 offline, and are "adopted" either by the CPU helping the outgoing
218 CPU or by the next rcu_barrier*() call, whichever comes first.
219
220o Each element of the form "1/1 0:127 ^0" represents one struct 220o Each element of the form "1/1 0:127 ^0" represents one struct
221 rcu_node. Each line represents one level of the hierarchy, from 221 rcu_node. Each line represents one level of the hierarchy, from
222 root to leaves. It is best to think of the rcu_data structures 222 root to leaves. It is best to think of the rcu_data structures
@@ -326,3 +326,115 @@ o "nn" is the number of times that this CPU needed nothing. Alert
326 readers will note that the rcu "nn" number for a given CPU very 326 readers will note that the rcu "nn" number for a given CPU very
327 closely matches the rcu_bh "np" number for that same CPU. This 327 closely matches the rcu_bh "np" number for that same CPU. This
328 is due to short-circuit evaluation in rcu_pending(). 328 is due to short-circuit evaluation in rcu_pending().
329
330
331CONFIG_TINY_RCU and CONFIG_TINY_PREEMPT_RCU debugfs Files and Formats
332
333These implementations of RCU provides a single debugfs file under the
334top-level directory RCU, namely rcu/rcudata, which displays fields in
335rcu_bh_ctrlblk, rcu_sched_ctrlblk and, for CONFIG_TINY_PREEMPT_RCU,
336rcu_preempt_ctrlblk.
337
338The output of "cat rcu/rcudata" is as follows:
339
340rcu_preempt: qlen=24 gp=1097669 g197/p197/c197 tasks=...
341 ttb=. btg=no ntb=184 neb=0 nnb=183 j=01f7 bt=0274
342 normal balk: nt=1097669 gt=0 bt=371 b=0 ny=25073378 nos=0
343 exp balk: bt=0 nos=0
344rcu_sched: qlen: 0
345rcu_bh: qlen: 0
346
347This is split into rcu_preempt, rcu_sched, and rcu_bh sections, with the
348rcu_preempt section appearing only in CONFIG_TINY_PREEMPT_RCU builds.
349The last three lines of the rcu_preempt section appear only in
350CONFIG_RCU_BOOST kernel builds. The fields are as follows:
351
352o "qlen" is the number of RCU callbacks currently waiting either
353 for an RCU grace period or waiting to be invoked. This is the
354 only field present for rcu_sched and rcu_bh, due to the
355 short-circuiting of grace period in those two cases.
356
357o "gp" is the number of grace periods that have completed.
358
359o "g197/p197/c197" displays the grace-period state, with the
360 "g" number being the number of grace periods that have started
361 (mod 256), the "p" number being the number of grace periods
362 that the CPU has responded to (also mod 256), and the "c"
363 number being the number of grace periods that have completed
364 (once again mode 256).
365
366 Why have both "gp" and "g"? Because the data flowing into
367 "gp" is only present in a CONFIG_RCU_TRACE kernel.
368
369o "tasks" is a set of bits. The first bit is "T" if there are
370 currently tasks that have recently blocked within an RCU
371 read-side critical section, the second bit is "N" if any of the
372 aforementioned tasks are blocking the current RCU grace period,
373 and the third bit is "E" if any of the aforementioned tasks are
374 blocking the current expedited grace period. Each bit is "."
375 if the corresponding condition does not hold.
376
377o "ttb" is a single bit. It is "B" if any of the blocked tasks
378 need to be priority boosted and "." otherwise.
379
380o "btg" indicates whether boosting has been carried out during
381 the current grace period, with "exp" indicating that boosting
382 is in progress for an expedited grace period, "no" indicating
383 that boosting has not yet started for a normal grace period,
384 "begun" indicating that boosting has bebug for a normal grace
385 period, and "done" indicating that boosting has completed for
386 a normal grace period.
387
388o "ntb" is the total number of tasks subjected to RCU priority boosting
389 periods since boot.
390
391o "neb" is the number of expedited grace periods that have had
392 to resort to RCU priority boosting since boot.
393
394o "nnb" is the number of normal grace periods that have had
395 to resort to RCU priority boosting since boot.
396
397o "j" is the low-order 12 bits of the jiffies counter in hexadecimal.
398
399o "bt" is the low-order 12 bits of the value that the jiffies counter
400 will have at the next time that boosting is scheduled to begin.
401
402o In the line beginning with "normal balk", the fields are as follows:
403
404 o "nt" is the number of times that the system balked from
405 boosting because there were no blocked tasks to boost.
406 Note that the system will balk from boosting even if the
407 grace period is overdue when the currently running task
408 is looping within an RCU read-side critical section.
409 There is no point in boosting in this case, because
410 boosting a running task won't make it run any faster.
411
412 o "gt" is the number of times that the system balked
413 from boosting because, although there were blocked tasks,
414 none of them were preventing the current grace period
415 from completing.
416
417 o "bt" is the number of times that the system balked
418 from boosting because boosting was already in progress.
419
420 o "b" is the number of times that the system balked from
421 boosting because boosting had already completed for
422 the grace period in question.
423
424 o "ny" is the number of times that the system balked from
425 boosting because it was not yet time to start boosting
426 the grace period in question.
427
428 o "nos" is the number of times that the system balked from
429 boosting for inexplicable ("not otherwise specified")
430 reasons. This can actually happen due to races involving
431 increments of the jiffies counter.
432
433o In the line beginning with "exp balk", the fields are as follows:
434
435 o "bt" is the number of times that the system balked from
436 boosting because there were no blocked tasks to boost.
437
438 o "nos" is the number of times that the system balked from
439 boosting for inexplicable ("not otherwise specified")
440 reasons.
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
index a2976a6de033..e9c77788a39d 100644
--- a/Documentation/accounting/getdelays.c
+++ b/Documentation/accounting/getdelays.c
@@ -516,6 +516,7 @@ int main(int argc, char *argv[])
516 default: 516 default:
517 fprintf(stderr, "Unknown nla_type %d\n", 517 fprintf(stderr, "Unknown nla_type %d\n",
518 na->nla_type); 518 na->nla_type);
519 case TASKSTATS_TYPE_NULL:
519 break; 520 break;
520 } 521 }
521 na = (struct nlattr *) (GENLMSG_DATA(&msg) + len); 522 na = (struct nlattr *) (GENLMSG_DATA(&msg) + len);
diff --git a/Documentation/dontdiff b/Documentation/dontdiff
index d9bcffd59433..470d3dba1a69 100644
--- a/Documentation/dontdiff
+++ b/Documentation/dontdiff
@@ -62,6 +62,10 @@ aic7*reg_print.c*
62aic7*seq.h* 62aic7*seq.h*
63aicasm 63aicasm
64aicdb.h* 64aicdb.h*
65altivec1.c
66altivec2.c
67altivec4.c
68altivec8.c
65asm-offsets.h 69asm-offsets.h
66asm_offsets.h 70asm_offsets.h
67autoconf.h* 71autoconf.h*
@@ -76,6 +80,7 @@ btfixupprep
76build 80build
77bvmlinux 81bvmlinux
78bzImage* 82bzImage*
83capflags.c
79classlist.h* 84classlist.h*
80comp*.log 85comp*.log
81compile.h* 86compile.h*
@@ -94,6 +99,7 @@ devlist.h*
94docproc 99docproc
95elf2ecoff 100elf2ecoff
96elfconfig.h* 101elfconfig.h*
102evergreen_reg_safe.h
97fixdep 103fixdep
98flask.h 104flask.h
99fore200e_mkfirm 105fore200e_mkfirm
@@ -108,9 +114,16 @@ genksyms
108*_gray256.c 114*_gray256.c
109ihex2fw 115ihex2fw
110ikconfig.h* 116ikconfig.h*
117inat-tables.c
111initramfs_data.cpio 118initramfs_data.cpio
112initramfs_data.cpio.gz 119initramfs_data.cpio.gz
113initramfs_list 120initramfs_list
121int16.c
122int1.c
123int2.c
124int32.c
125int4.c
126int8.c
114kallsyms 127kallsyms
115kconfig 128kconfig
116keywords.c 129keywords.c
@@ -140,6 +153,7 @@ mkprep
140mktables 153mktables
141mktree 154mktree
142modpost 155modpost
156modules.builtin
143modules.order 157modules.order
144modversions.h* 158modversions.h*
145ncscope.* 159ncscope.*
@@ -153,14 +167,23 @@ pca200e.bin
153pca200e_ecd.bin2 167pca200e_ecd.bin2
154piggy.gz 168piggy.gz
155piggyback 169piggyback
170piggy.S
156pnmtologo 171pnmtologo
157ppc_defs.h* 172ppc_defs.h*
158pss_boot.h 173pss_boot.h
159qconf 174qconf
175r100_reg_safe.h
176r200_reg_safe.h
177r300_reg_safe.h
178r420_reg_safe.h
179r600_reg_safe.h
160raid6altivec*.c 180raid6altivec*.c
161raid6int*.c 181raid6int*.c
162raid6tables.c 182raid6tables.c
163relocs 183relocs
184rn50_reg_safe.h
185rs600_reg_safe.h
186rv515_reg_safe.h
164series 187series
165setup 188setup
166setup.bin 189setup.bin
@@ -169,6 +192,7 @@ sImage
169sm_tbl* 192sm_tbl*
170split-include 193split-include
171syscalltab.h 194syscalltab.h
195tables.c
172tags 196tags
173tftpboot.img 197tftpboot.img
174timeconst.h 198timeconst.h
@@ -190,6 +214,7 @@ vmlinux
190vmlinux-* 214vmlinux-*
191vmlinux.aout 215vmlinux.aout
192vmlinux.lds 216vmlinux.lds
217voffset.h
193vsyscall.lds 218vsyscall.lds
194vsyscall_32.lds 219vsyscall_32.lds
195wanxlfw.inc 220wanxlfw.inc
@@ -200,3 +225,4 @@ wakeup.elf
200wakeup.lds 225wakeup.lds
201zImage* 226zImage*
202zconf.hash.c 227zconf.hash.c
228zoffset.h
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index b6426f15b4ae..33fa3e5d38fd 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -18,7 +18,6 @@ prototypes:
18 char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen); 18 char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
19 19
20locking rules: 20locking rules:
21 none have BKL
22 dcache_lock rename_lock ->d_lock may block 21 dcache_lock rename_lock ->d_lock may block
23d_revalidate: no no no yes 22d_revalidate: no no no yes
24d_hash no no no yes 23d_hash no no no yes
@@ -42,18 +41,23 @@ ata *);
42 int (*rename) (struct inode *, struct dentry *, 41 int (*rename) (struct inode *, struct dentry *,
43 struct inode *, struct dentry *); 42 struct inode *, struct dentry *);
44 int (*readlink) (struct dentry *, char __user *,int); 43 int (*readlink) (struct dentry *, char __user *,int);
45 int (*follow_link) (struct dentry *, struct nameidata *); 44 void * (*follow_link) (struct dentry *, struct nameidata *);
45 void (*put_link) (struct dentry *, struct nameidata *, void *);
46 void (*truncate) (struct inode *); 46 void (*truncate) (struct inode *);
47 int (*permission) (struct inode *, int, struct nameidata *); 47 int (*permission) (struct inode *, int, struct nameidata *);
48 int (*check_acl)(struct inode *, int);
48 int (*setattr) (struct dentry *, struct iattr *); 49 int (*setattr) (struct dentry *, struct iattr *);
49 int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *); 50 int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *);
50 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); 51 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
51 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); 52 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
52 ssize_t (*listxattr) (struct dentry *, char *, size_t); 53 ssize_t (*listxattr) (struct dentry *, char *, size_t);
53 int (*removexattr) (struct dentry *, const char *); 54 int (*removexattr) (struct dentry *, const char *);
55 void (*truncate_range)(struct inode *, loff_t, loff_t);
56 long (*fallocate)(struct inode *inode, int mode, loff_t offset, loff_t len);
57 int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
54 58
55locking rules: 59locking rules:
56 all may block, none have BKL 60 all may block
57 i_mutex(inode) 61 i_mutex(inode)
58lookup: yes 62lookup: yes
59create: yes 63create: yes
@@ -66,19 +70,24 @@ rmdir: yes (both) (see below)
66rename: yes (all) (see below) 70rename: yes (all) (see below)
67readlink: no 71readlink: no
68follow_link: no 72follow_link: no
73put_link: no
69truncate: yes (see below) 74truncate: yes (see below)
70setattr: yes 75setattr: yes
71permission: no 76permission: no
77check_acl: no
72getattr: no 78getattr: no
73setxattr: yes 79setxattr: yes
74getxattr: no 80getxattr: no
75listxattr: no 81listxattr: no
76removexattr: yes 82removexattr: yes
83truncate_range: yes
84fallocate: no
85fiemap: no
77 Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on 86 Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
78victim. 87victim.
79 cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. 88 cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem.
80 ->truncate() is never called directly - it's a callback, not a 89 ->truncate() is never called directly - it's a callback, not a
81method. It's called by vmtruncate() - library function normally used by 90method. It's called by vmtruncate() - deprecated library function used by
82->setattr(). Locking information above applies to that call (i.e. is 91->setattr(). Locking information above applies to that call (i.e. is
83inherited from ->setattr() - vmtruncate() is used when ATTR_SIZE had been 92inherited from ->setattr() - vmtruncate() is used when ATTR_SIZE had been
84passed). 93passed).
@@ -91,7 +100,7 @@ prototypes:
91 struct inode *(*alloc_inode)(struct super_block *sb); 100 struct inode *(*alloc_inode)(struct super_block *sb);
92 void (*destroy_inode)(struct inode *); 101 void (*destroy_inode)(struct inode *);
93 void (*dirty_inode) (struct inode *); 102 void (*dirty_inode) (struct inode *);
94 int (*write_inode) (struct inode *, int); 103 int (*write_inode) (struct inode *, struct writeback_control *wbc);
95 int (*drop_inode) (struct inode *); 104 int (*drop_inode) (struct inode *);
96 void (*evict_inode) (struct inode *); 105 void (*evict_inode) (struct inode *);
97 void (*put_super) (struct super_block *); 106 void (*put_super) (struct super_block *);
@@ -105,10 +114,10 @@ prototypes:
105 int (*show_options)(struct seq_file *, struct vfsmount *); 114 int (*show_options)(struct seq_file *, struct vfsmount *);
106 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); 115 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
107 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); 116 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
117 int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
108 118
109locking rules: 119locking rules:
110 All may block [not true, see below] 120 All may block [not true, see below]
111 None have BKL
112 s_umount 121 s_umount
113alloc_inode: 122alloc_inode:
114destroy_inode: 123destroy_inode:
@@ -127,6 +136,7 @@ umount_begin: no
127show_options: no (namespace_sem) 136show_options: no (namespace_sem)
128quota_read: no (see below) 137quota_read: no (see below)
129quota_write: no (see below) 138quota_write: no (see below)
139bdev_try_to_free_page: no (see below)
130 140
131->statfs() has s_umount (shared) when called by ustat(2) (native or 141->statfs() has s_umount (shared) when called by ustat(2) (native or
132compat), but that's an accident of bad API; s_umount is used to pin 142compat), but that's an accident of bad API; s_umount is used to pin
@@ -139,19 +149,25 @@ be the only ones operating on the quota file by the quota code (via
139dqio_sem) (unless an admin really wants to screw up something and 149dqio_sem) (unless an admin really wants to screw up something and
140writes to quota files with quotas on). For other details about locking 150writes to quota files with quotas on). For other details about locking
141see also dquot_operations section. 151see also dquot_operations section.
152->bdev_try_to_free_page is called from the ->releasepage handler of
153the block device inode. See there for more details.
142 154
143--------------------------- file_system_type --------------------------- 155--------------------------- file_system_type ---------------------------
144prototypes: 156prototypes:
145 int (*get_sb) (struct file_system_type *, int, 157 int (*get_sb) (struct file_system_type *, int,
146 const char *, void *, struct vfsmount *); 158 const char *, void *, struct vfsmount *);
159 struct dentry *(*mount) (struct file_system_type *, int,
160 const char *, void *);
147 void (*kill_sb) (struct super_block *); 161 void (*kill_sb) (struct super_block *);
148locking rules: 162locking rules:
149 may block BKL 163 may block
150get_sb yes no 164get_sb yes
151kill_sb yes no 165mount yes
166kill_sb yes
152 167
153->get_sb() returns error or 0 with locked superblock attached to the vfsmount 168->get_sb() returns error or 0 with locked superblock attached to the vfsmount
154(exclusive on ->s_umount). 169(exclusive on ->s_umount).
170->mount() returns ERR_PTR or the root dentry.
155->kill_sb() takes a write-locked superblock, does all shutdown work on it, 171->kill_sb() takes a write-locked superblock, does all shutdown work on it,
156unlocks and drops the reference. 172unlocks and drops the reference.
157 173
@@ -176,27 +192,35 @@ prototypes:
176 void (*freepage)(struct page *); 192 void (*freepage)(struct page *);
177 int (*direct_IO)(int, struct kiocb *, const struct iovec *iov, 193 int (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
178 loff_t offset, unsigned long nr_segs); 194 loff_t offset, unsigned long nr_segs);
179 int (*launder_page) (struct page *); 195 int (*get_xip_mem)(struct address_space *, pgoff_t, int, void **,
196 unsigned long *);
197 int (*migratepage)(struct address_space *, struct page *, struct page *);
198 int (*launder_page)(struct page *);
199 int (*is_partially_uptodate)(struct page *, read_descriptor_t *, unsigned long);
200 int (*error_remove_page)(struct address_space *, struct page *);
180 201
181locking rules: 202locking rules:
182 All except set_page_dirty and freepage may block 203 All except set_page_dirty and freepage may block
183 204
184 BKL PageLocked(page) i_mutex 205 PageLocked(page) i_mutex
185writepage: no yes, unlocks (see below) 206writepage: yes, unlocks (see below)
186readpage: no yes, unlocks 207readpage: yes, unlocks
187sync_page: no maybe 208sync_page: maybe
188writepages: no 209writepages:
189set_page_dirty no no 210set_page_dirty no
190readpages: no 211readpages:
191write_begin: no locks the page yes 212write_begin: locks the page yes
192write_end: no yes, unlocks yes 213write_end: yes, unlocks yes
193perform_write: no n/a yes 214bmap:
194bmap: no 215invalidatepage: yes
195invalidatepage: no yes 216releasepage: yes
196releasepage: no yes 217freepage: yes
197freepage: no yes 218direct_IO:
198direct_IO: no 219get_xip_mem: maybe
199launder_page: no yes 220migratepage: yes (both)
221launder_page: yes
222is_partially_uptodate: yes
223error_remove_page: yes
200 224
201 ->write_begin(), ->write_end(), ->sync_page() and ->readpage() 225 ->write_begin(), ->write_end(), ->sync_page() and ->readpage()
202may be called from the request handler (/dev/loop). 226may be called from the request handler (/dev/loop).
@@ -276,9 +300,8 @@ under spinlock (it cannot block) and is sometimes called with the page
276not locked. 300not locked.
277 301
278 ->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some 302 ->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some
279filesystems and by the swapper. The latter will eventually go away. All 303filesystems and by the swapper. The latter will eventually go away. Please,
280instances do not actually need the BKL. Please, keep it that way and don't 304keep it that way and don't breed new callers.
281breed new callers.
282 305
283 ->invalidatepage() is called when the filesystem must attempt to drop 306 ->invalidatepage() is called when the filesystem must attempt to drop
284some or all of the buffers from the page when it is being truncated. It 307some or all of the buffers from the page when it is being truncated. It
@@ -299,47 +322,37 @@ cleaned, or an error value if not. Note that in order to prevent the page
299getting mapped back in and redirtied, it needs to be kept locked 322getting mapped back in and redirtied, it needs to be kept locked
300across the entire operation. 323across the entire operation.
301 324
302 Note: currently almost all instances of address_space methods are
303using BKL for internal serialization and that's one of the worst sources
304of contention. Normally they are calling library functions (in fs/buffer.c)
305and pass foo_get_block() as a callback (on local block-based filesystems,
306indeed). BKL is not needed for library stuff and is usually taken by
307foo_get_block(). It's an overkill, since block bitmaps can be protected by
308internal fs locking and real critical areas are much smaller than the areas
309filesystems protect now.
310
311----------------------- file_lock_operations ------------------------------ 325----------------------- file_lock_operations ------------------------------
312prototypes: 326prototypes:
313 void (*fl_insert)(struct file_lock *); /* lock insertion callback */
314 void (*fl_remove)(struct file_lock *); /* lock removal callback */
315 void (*fl_copy_lock)(struct file_lock *, struct file_lock *); 327 void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
316 void (*fl_release_private)(struct file_lock *); 328 void (*fl_release_private)(struct file_lock *);
317 329
318 330
319locking rules: 331locking rules:
320 BKL may block 332 file_lock_lock may block
321fl_insert: yes no 333fl_copy_lock: yes no
322fl_remove: yes no 334fl_release_private: maybe no
323fl_copy_lock: yes no
324fl_release_private: yes yes
325 335
326----------------------- lock_manager_operations --------------------------- 336----------------------- lock_manager_operations ---------------------------
327prototypes: 337prototypes:
328 int (*fl_compare_owner)(struct file_lock *, struct file_lock *); 338 int (*fl_compare_owner)(struct file_lock *, struct file_lock *);
329 void (*fl_notify)(struct file_lock *); /* unblock callback */ 339 void (*fl_notify)(struct file_lock *); /* unblock callback */
340 int (*fl_grant)(struct file_lock *, struct file_lock *, int);
330 void (*fl_release_private)(struct file_lock *); 341 void (*fl_release_private)(struct file_lock *);
331 void (*fl_break)(struct file_lock *); /* break_lease callback */ 342 void (*fl_break)(struct file_lock *); /* break_lease callback */
343 int (*fl_mylease)(struct file_lock *, struct file_lock *);
344 int (*fl_change)(struct file_lock **, int);
332 345
333locking rules: 346locking rules:
334 BKL may block 347 file_lock_lock may block
335fl_compare_owner: yes no 348fl_compare_owner: yes no
336fl_notify: yes no 349fl_notify: yes no
337fl_release_private: yes yes 350fl_grant: no no
338fl_break: yes no 351fl_release_private: maybe no
339 352fl_break: yes no
340 Currently only NFSD and NLM provide instances of this class. None of the 353fl_mylease: yes no
341them block. If you have out-of-tree instances - please, show up. Locking 354fl_change yes no
342in that area will change. 355
343--------------------------- buffer_head ----------------------------------- 356--------------------------- buffer_head -----------------------------------
344prototypes: 357prototypes:
345 void (*b_end_io)(struct buffer_head *bh, int uptodate); 358 void (*b_end_io)(struct buffer_head *bh, int uptodate);
@@ -364,17 +377,17 @@ prototypes:
364 void (*swap_slot_free_notify) (struct block_device *, unsigned long); 377 void (*swap_slot_free_notify) (struct block_device *, unsigned long);
365 378
366locking rules: 379locking rules:
367 BKL bd_mutex 380 bd_mutex
368open: no yes 381open: yes
369release: no yes 382release: yes
370ioctl: no no 383ioctl: no
371compat_ioctl: no no 384compat_ioctl: no
372direct_access: no no 385direct_access: no
373media_changed: no no 386media_changed: no
374unlock_native_capacity: no no 387unlock_native_capacity: no
375revalidate_disk: no no 388revalidate_disk: no
376getgeo: no no 389getgeo: no
377swap_slot_free_notify: no no (see below) 390swap_slot_free_notify: no (see below)
378 391
379media_changed, unlock_native_capacity and revalidate_disk are called only from 392media_changed, unlock_native_capacity and revalidate_disk are called only from
380check_disk_change(). 393check_disk_change().
@@ -413,34 +426,21 @@ prototypes:
413 unsigned long (*get_unmapped_area)(struct file *, unsigned long, 426 unsigned long (*get_unmapped_area)(struct file *, unsigned long,
414 unsigned long, unsigned long, unsigned long); 427 unsigned long, unsigned long, unsigned long);
415 int (*check_flags)(int); 428 int (*check_flags)(int);
429 int (*flock) (struct file *, int, struct file_lock *);
430 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *,
431 size_t, unsigned int);
432 ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *,
433 size_t, unsigned int);
434 int (*setlease)(struct file *, long, struct file_lock **);
416}; 435};
417 436
418locking rules: 437locking rules:
419 All may block. 438 All may block except for ->setlease.
420 BKL 439 No VFS locks held on entry except for ->fsync and ->setlease.
421llseek: no (see below) 440
422read: no 441->fsync() has i_mutex on inode.
423aio_read: no 442
424write: no 443->setlease has the file_list_lock held and must not sleep.
425aio_write: no
426readdir: no
427poll: no
428unlocked_ioctl: no
429compat_ioctl: no
430mmap: no
431open: no
432flush: no
433release: no
434fsync: no (see below)
435aio_fsync: no
436fasync: no
437lock: yes
438readv: no
439writev: no
440sendfile: no
441sendpage: no
442get_unmapped_area: no
443check_flags: no
444 444
445->llseek() locking has moved from llseek to the individual llseek 445->llseek() locking has moved from llseek to the individual llseek
446implementations. If your fs is not using generic_file_llseek, you 446implementations. If your fs is not using generic_file_llseek, you
@@ -450,17 +450,10 @@ mutex or just to use i_size_read() instead.
450Note: this does not protect the file->f_pos against concurrent modifications 450Note: this does not protect the file->f_pos against concurrent modifications
451since this is something the userspace has to take care about. 451since this is something the userspace has to take care about.
452 452
453Note: ext2_release() was *the* source of contention on fs-intensive 453->fasync() is responsible for maintaining the FASYNC bit in filp->f_flags.
454loads and dropping BKL on ->release() helps to get rid of that (we still 454Most instances call fasync_helper(), which does that maintenance, so it's
455grab BKL for cases when we close a file that had been opened r/w, but that 455not normally something one needs to worry about. Return values > 0 will be
456can and should be done using the internal locking with smaller critical areas). 456mapped to zero in the VFS layer.
457Current worst offender is ext2_get_block()...
458
459->fasync() is called without BKL protection, and is responsible for
460maintaining the FASYNC bit in filp->f_flags. Most instances call
461fasync_helper(), which does that maintenance, so it's not normally
462something one needs to worry about. Return values > 0 will be mapped to
463zero in the VFS layer.
464 457
465->readdir() and ->ioctl() on directories must be changed. Ideally we would 458->readdir() and ->ioctl() on directories must be changed. Ideally we would
466move ->readdir() to inode_operations and use a separate method for directory 459move ->readdir() to inode_operations and use a separate method for directory
@@ -471,8 +464,6 @@ components. And there are other reasons why the current interface is a mess...
471->read on directories probably must go away - we should just enforce -EISDIR 464->read on directories probably must go away - we should just enforce -EISDIR
472in sys_read() and friends. 465in sys_read() and friends.
473 466
474->fsync() has i_mutex on inode.
475
476--------------------------- dquot_operations ------------------------------- 467--------------------------- dquot_operations -------------------------------
477prototypes: 468prototypes:
478 int (*write_dquot) (struct dquot *); 469 int (*write_dquot) (struct dquot *);
@@ -507,12 +498,12 @@ prototypes:
507 int (*access)(struct vm_area_struct *, unsigned long, void*, int, int); 498 int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
508 499
509locking rules: 500locking rules:
510 BKL mmap_sem PageLocked(page) 501 mmap_sem PageLocked(page)
511open: no yes 502open: yes
512close: no yes 503close: yes
513fault: no yes can return with page locked 504fault: yes can return with page locked
514page_mkwrite: no yes can return with page locked 505page_mkwrite: yes can return with page locked
515access: no yes 506access: yes
516 507
517 ->fault() is called when a previously not present pte is about 508 ->fault() is called when a previously not present pte is about
518to be faulted in. The filesystem must find and return the page associated 509to be faulted in. The filesystem must find and return the page associated
@@ -539,6 +530,3 @@ VM_IO | VM_PFNMAP VMAs.
539 530
540(if you break something or notice that it is broken and do not fix it yourself 531(if you break something or notice that it is broken and do not fix it yourself
541- at least put it here) 532- at least put it here)
542
543ipc/shm.c::shm_delete() - may need BKL.
544->read() and ->write() in many drivers are (probably) missing BKL.
diff --git a/Documentation/kernel-docs.txt b/Documentation/kernel-docs.txt
index 715eaaf1519d..9a8674629a07 100644
--- a/Documentation/kernel-docs.txt
+++ b/Documentation/kernel-docs.txt
@@ -537,7 +537,7 @@
537 Notes: Further information in 537 Notes: Further information in
538 http://www.oreilly.com/catalog/linuxdrive2/ 538 http://www.oreilly.com/catalog/linuxdrive2/
539 539
540 * Title: "Linux Device Drivers, 3nd Edition" 540 * Title: "Linux Device Drivers, 3rd Edition"
541 Authors: Jonathan Corbet, Alessandro Rubini, and Greg Kroah-Hartman 541 Authors: Jonathan Corbet, Alessandro Rubini, and Greg Kroah-Hartman
542 Publisher: O'Reilly & Associates. 542 Publisher: O'Reilly & Associates.
543 Date: 2005. 543 Date: 2005.
@@ -592,14 +592,6 @@
592 Pages: 600. 592 Pages: 600.
593 ISBN: 0-13-101908-2 593 ISBN: 0-13-101908-2
594 594
595 * Title: "The Design and Implementation of the 4.4 BSD UNIX
596 Operating System"
597 Author: Marshall Kirk McKusick, Keith Bostic, Michael J. Karels,
598 John S. Quarterman.
599 Publisher: Addison-Wesley.
600 Date: 1996.
601 ISBN: 0-201-54979-4
602
603 * Title: "Programming for the real world - POSIX.4" 595 * Title: "Programming for the real world - POSIX.4"
604 Author: Bill O. Gallmeister. 596 Author: Bill O. Gallmeister.
605 Publisher: O'Reilly & Associates, Inc.. 597 Publisher: O'Reilly & Associates, Inc..
@@ -610,28 +602,13 @@
610 POSIX. Good reference. 602 POSIX. Good reference.
611 603
612 * Title: "UNIX Systems for Modern Architectures: Symmetric 604 * Title: "UNIX Systems for Modern Architectures: Symmetric
613 Multiprocesssing and Caching for Kernel Programmers" 605 Multiprocessing and Caching for Kernel Programmers"
614 Author: Curt Schimmel. 606 Author: Curt Schimmel.
615 Publisher: Addison Wesley. 607 Publisher: Addison Wesley.
616 Date: June, 1994. 608 Date: June, 1994.
617 Pages: 432. 609 Pages: 432.
618 ISBN: 0-201-63338-8 610 ISBN: 0-201-63338-8
619 611
620 * Title: "The Design and Implementation of the 4.3 BSD UNIX
621 Operating System"
622 Author: Samuel J. Leffler, Marshall Kirk McKusick, Michael J.
623 Karels, John S. Quarterman.
624 Publisher: Addison-Wesley.
625 Date: 1989 (reprinted with corrections on October, 1990).
626 ISBN: 0-201-06196-1
627
628 * Title: "The Design of the UNIX Operating System"
629 Author: Maurice J. Bach.
630 Publisher: Prentice Hall.
631 Date: 1986.
632 Pages: 471.
633 ISBN: 0-13-201757-1
634
635 MISCELLANEOUS: 612 MISCELLANEOUS:
636 613
637 * Name: linux/Documentation 614 * Name: linux/Documentation
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 8b61c9360999..f3dc951e949f 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1579,20 +1579,12 @@ and is between 256 and 4096 characters. It is defined in the file
1579 1579
1580 nmi_watchdog= [KNL,BUGS=X86] Debugging features for SMP kernels 1580 nmi_watchdog= [KNL,BUGS=X86] Debugging features for SMP kernels
1581 Format: [panic,][num] 1581 Format: [panic,][num]
1582 Valid num: 0,1,2 1582 Valid num: 0
1583 0 - turn nmi_watchdog off 1583 0 - turn nmi_watchdog off
1584 1 - use the IO-APIC timer for the NMI watchdog
1585 2 - use the local APIC for the NMI watchdog using
1586 a performance counter. Note: This will use one
1587 performance counter and the local APIC's performance
1588 vector.
1589 When panic is specified, panic when an NMI watchdog 1584 When panic is specified, panic when an NMI watchdog
1590 timeout occurs. 1585 timeout occurs.
1591 This is useful when you use a panic=... timeout and 1586 This is useful when you use a panic=... timeout and
1592 need the box quickly up again. 1587 need the box quickly up again.
1593 Instead of 1 and 2 it is possible to use the following
1594 symbolic names: lapic and ioapic
1595 Example: nmi_watchdog=2 or nmi_watchdog=panic,lapic
1596 1588
1597 netpoll.carrier_timeout= 1589 netpoll.carrier_timeout=
1598 [NET] Specifies amount of time (in seconds) that 1590 [NET] Specifies amount of time (in seconds) that
@@ -1622,6 +1614,8 @@ and is between 256 and 4096 characters. It is defined in the file
1622 noapic [SMP,APIC] Tells the kernel to not make use of any 1614 noapic [SMP,APIC] Tells the kernel to not make use of any
1623 IOAPICs that may be present in the system. 1615 IOAPICs that may be present in the system.
1624 1616
1617 noautogroup Disable scheduler automatic task group creation.
1618
1625 nobats [PPC] Do not use BATs for mapping kernel lowmem 1619 nobats [PPC] Do not use BATs for mapping kernel lowmem
1626 on "Classic" PPC cores. 1620 on "Classic" PPC cores.
1627 1621
@@ -1759,7 +1753,7 @@ and is between 256 and 4096 characters. It is defined in the file
1759 1753
1760 nousb [USB] Disable the USB subsystem 1754 nousb [USB] Disable the USB subsystem
1761 1755
1762 nowatchdog [KNL] Disable the lockup detector. 1756 nowatchdog [KNL] Disable the lockup detector (NMI watchdog).
1763 1757
1764 nowb [ARM] 1758 nowb [ARM]
1765 1759
@@ -2467,12 +2461,13 @@ and is between 256 and 4096 characters. It is defined in the file
2467 to facilitate early boot debugging. 2461 to facilitate early boot debugging.
2468 See also Documentation/trace/events.txt 2462 See also Documentation/trace/events.txt
2469 2463
2470 tsc= Disable clocksource-must-verify flag for TSC. 2464 tsc= Disable clocksource stability checks for TSC.
2471 Format: <string> 2465 Format: <string>
2472 [x86] reliable: mark tsc clocksource as reliable, this 2466 [x86] reliable: mark tsc clocksource as reliable, this
2473 disables clocksource verification at runtime. 2467 disables clocksource verification at runtime, as well
2474 Used to enable high-resolution timer mode on older 2468 as the stability checks done at bootup. Used to enable
2475 hardware, and in virtualized environment. 2469 high-resolution timer mode on older hardware, and in
2470 virtualized environment.
2476 [x86] noirqtime: Do not use TSC to do irq accounting. 2471 [x86] noirqtime: Do not use TSC to do irq accounting.
2477 Used to run time disable IRQ_TIME_ACCOUNTING on any 2472 Used to run time disable IRQ_TIME_ACCOUNTING on any
2478 platforms where RDTSC is slow and this accounting 2473 platforms where RDTSC is slow and this accounting
diff --git a/Documentation/scsi/scsi_mid_low_api.txt b/Documentation/scsi/scsi_mid_low_api.txt
index 570ef2b3d79b..df322c103466 100644
--- a/Documentation/scsi/scsi_mid_low_api.txt
+++ b/Documentation/scsi/scsi_mid_low_api.txt
@@ -1044,9 +1044,9 @@ Details:
1044 1044
1045 1045
1046/** 1046/**
1047 * queuecommand - queue scsi command, invoke 'done' on completion 1047 * queuecommand - queue scsi command, invoke scp->scsi_done on completion
1048 * @shost: pointer to the scsi host object
1048 * @scp: pointer to scsi command object 1049 * @scp: pointer to scsi command object
1049 * @done: function pointer to be invoked on completion
1050 * 1050 *
1051 * Returns 0 on success. 1051 * Returns 0 on success.
1052 * 1052 *
@@ -1074,42 +1074,45 @@ Details:
1074 * 1074 *
1075 * Other types of errors that are detected immediately may be 1075 * Other types of errors that are detected immediately may be
1076 * flagged by setting scp->result to an appropriate value, 1076 * flagged by setting scp->result to an appropriate value,
1077 * invoking the 'done' callback, and then returning 0 from this 1077 * invoking the scp->scsi_done callback, and then returning 0
1078 * function. If the command is not performed immediately (and the 1078 * from this function. If the command is not performed
1079 * LLD is starting (or will start) the given command) then this 1079 * immediately (and the LLD is starting (or will start) the given
1080 * function should place 0 in scp->result and return 0. 1080 * command) then this function should place 0 in scp->result and
1081 * return 0.
1081 * 1082 *
1082 * Command ownership. If the driver returns zero, it owns the 1083 * Command ownership. If the driver returns zero, it owns the
1083 * command and must take responsibility for ensuring the 'done' 1084 * command and must take responsibility for ensuring the
1084 * callback is executed. Note: the driver may call done before 1085 * scp->scsi_done callback is executed. Note: the driver may
1085 * returning zero, but after it has called done, it may not 1086 * call scp->scsi_done before returning zero, but after it has
1086 * return any value other than zero. If the driver makes a 1087 * called scp->scsi_done, it may not return any value other than
1087 * non-zero return, it must not execute the command's done 1088 * zero. If the driver makes a non-zero return, it must not
1088 * callback at any time. 1089 * execute the command's scsi_done callback at any time.
1089 * 1090 *
1090 * Locks: struct Scsi_Host::host_lock held on entry (with "irqsave") 1091 * Locks: up to and including 2.6.36, struct Scsi_Host::host_lock
1091 * and is expected to be held on return. 1092 * held on entry (with "irqsave") and is expected to be
1093 * held on return. From 2.6.37 onwards, queuecommand is
1094 * called without any locks held.
1092 * 1095 *
1093 * Calling context: in interrupt (soft irq) or process context 1096 * Calling context: in interrupt (soft irq) or process context
1094 * 1097 *
1095 * Notes: This function should be relatively fast. Normally it will 1098 * Notes: This function should be relatively fast. Normally it
1096 * not wait for IO to complete. Hence the 'done' callback is invoked 1099 * will not wait for IO to complete. Hence the scp->scsi_done
1097 * (often directly from an interrupt service routine) some time after 1100 * callback is invoked (often directly from an interrupt service
1098 * this function has returned. In some cases (e.g. pseudo adapter 1101 * routine) some time after this function has returned. In some
1099 * drivers that manufacture the response to a SCSI INQUIRY) 1102 * cases (e.g. pseudo adapter drivers that manufacture the
1100 * the 'done' callback may be invoked before this function returns. 1103 * response to a SCSI INQUIRY) the scp->scsi_done callback may be
1101 * If the 'done' callback is not invoked within a certain period 1104 * invoked before this function returns. If the scp->scsi_done
1102 * the SCSI mid level will commence error processing. 1105 * callback is not invoked within a certain period the SCSI mid
1103 * If a status of CHECK CONDITION is placed in "result" when the 1106 * level will commence error processing. If a status of CHECK
1104 * 'done' callback is invoked, then the LLD driver should 1107 * CONDITION is placed in "result" when the scp->scsi_done
1105 * perform autosense and fill in the struct scsi_cmnd::sense_buffer 1108 * callback is invoked, then the LLD driver should perform
1109 * autosense and fill in the struct scsi_cmnd::sense_buffer
1106 * array. The scsi_cmnd::sense_buffer array is zeroed prior to 1110 * array. The scsi_cmnd::sense_buffer array is zeroed prior to
1107 * the mid level queuing a command to an LLD. 1111 * the mid level queuing a command to an LLD.
1108 * 1112 *
1109 * Defined in: LLD 1113 * Defined in: LLD
1110 **/ 1114 **/
1111 int queuecommand(struct scsi_cmnd * scp, 1115 int queuecommand(struct Scsi_Host *shost, struct scsi_cmnd * scp)
1112 void (*done)(struct scsi_cmnd *))
1113 1116
1114 1117
1115/** 1118/**
diff --git a/Documentation/trace/events-power.txt b/Documentation/trace/events-power.txt
new file mode 100644
index 000000000000..96d87b67fe37
--- /dev/null
+++ b/Documentation/trace/events-power.txt
@@ -0,0 +1,90 @@
1
2 Subsystem Trace Points: power
3
4The power tracing system captures events related to power transitions
5within the kernel. Broadly speaking there are three major subheadings:
6
7 o Power state switch which reports events related to suspend (S-states),
8 cpuidle (C-states) and cpufreq (P-states)
9 o System clock related changes
10 o Power domains related changes and transitions
11
12This document describes what each of the tracepoints is and why they
13might be useful.
14
15Cf. include/trace/events/power.h for the events definitions.
16
171. Power state switch events
18============================
19
201.1 New trace API
21-----------------
22
23A 'cpu' event class gathers the CPU-related events: cpuidle and
24cpufreq.
25
26cpu_idle "state=%lu cpu_id=%lu"
27cpu_frequency "state=%lu cpu_id=%lu"
28
29A suspend event is used to indicate the system going in and out of the
30suspend mode:
31
32machine_suspend "state=%lu"
33
34
35Note: the value of '-1' or '4294967295' for state means an exit from the current state,
36i.e. trace_cpu_idle(4, smp_processor_id()) means that the system
37enters the idle state 4, while trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id())
38means that the system exits the previous idle state.
39
40The event which has 'state=4294967295' in the trace is very important to the user
41space tools which are using it to detect the end of the current state, and so to
42correctly draw the states diagrams and to calculate accurate statistics etc.
43
441.2 DEPRECATED trace API
45------------------------
46
47A new Kconfig option CONFIG_EVENT_POWER_TRACING_DEPRECATED with the default value of
48'y' has been created. This allows the legacy trace power API to be used conjointly
49with the new trace API.
50The Kconfig option, the old trace API (in include/trace/events/power.h) and the
51old trace points will disappear in a future release (namely 2.6.41).
52
53power_start "type=%lu state=%lu cpu_id=%lu"
54power_frequency "type=%lu state=%lu cpu_id=%lu"
55power_end "cpu_id=%lu"
56
57The 'type' parameter takes one of those macros:
58 . POWER_NONE = 0,
59 . POWER_CSTATE = 1, /* C-State */
60 . POWER_PSTATE = 2, /* Fequency change or DVFS */
61
62The 'state' parameter is set depending on the type:
63 . Target C-state for type=POWER_CSTATE,
64 . Target frequency for type=POWER_PSTATE,
65
66power_end is used to indicate the exit of a state, corresponding to the latest
67power_start event.
68
692. Clocks events
70================
71The clock events are used for clock enable/disable and for
72clock rate change.
73
74clock_enable "%s state=%lu cpu_id=%lu"
75clock_disable "%s state=%lu cpu_id=%lu"
76clock_set_rate "%s state=%lu cpu_id=%lu"
77
78The first parameter gives the clock name (e.g. "gpio1_iclk").
79The second parameter is '1' for enable, '0' for disable, the target
80clock rate for set_rate.
81
823. Power domains events
83=======================
84The power domain events are used for power domains transitions
85
86power_domain_target "%s state=%lu cpu_id=%lu"
87
88The first parameter gives the power domain name (e.g. "mpu_pwrdm").
89The second parameter is the power domain target state.
90
diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
index b3e73ddb1567..12cecc83cd91 100644
--- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
+++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
@@ -373,9 +373,18 @@ EVENT_PROCESS:
373 print " $regex_lru_isolate/o\n"; 373 print " $regex_lru_isolate/o\n";
374 next; 374 next;
375 } 375 }
376 my $isolate_mode = $1;
376 my $nr_scanned = $4; 377 my $nr_scanned = $4;
377 my $nr_contig_dirty = $7; 378 my $nr_contig_dirty = $7;
378 $perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned; 379
380 # To closer match vmstat scanning statistics, only count isolate_both
381 # and isolate_inactive as scanning. isolate_active is rotation
382 # isolate_inactive == 0
383 # isolate_active == 1
384 # isolate_both == 2
385 if ($isolate_mode != 1) {
386 $perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned;
387 }
379 $perprocesspid{$process_pid}->{HIGH_NR_CONTIG_DIRTY} += $nr_contig_dirty; 388 $perprocesspid{$process_pid}->{HIGH_NR_CONTIG_DIRTY} += $nr_contig_dirty;
380 } elsif ($tracepoint eq "mm_vmscan_lru_shrink_inactive") { 389 } elsif ($tracepoint eq "mm_vmscan_lru_shrink_inactive") {
381 $details = $5; 390 $details = $5;
diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt
index 30b43e1b2697..bdeb81ccb5f6 100644
--- a/Documentation/x86/boot.txt
+++ b/Documentation/x86/boot.txt
@@ -600,6 +600,7 @@ Protocol: 2.07+
600 0x00000001 lguest 600 0x00000001 lguest
601 0x00000002 Xen 601 0x00000002 Xen
602 0x00000003 Moorestown MID 602 0x00000003 Moorestown MID
603 0x00000004 CE4100 TV Platform
603 604
604Field name: hardware_subarch_data 605Field name: hardware_subarch_data
605Type: write (subarch-dependent) 606Type: write (subarch-dependent)
diff --git a/MAINTAINERS b/MAINTAINERS
index 6a588873cf8d..c5c7292daba0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -405,7 +405,7 @@ S: Supported
405F: drivers/usb/gadget/amd5536udc.* 405F: drivers/usb/gadget/amd5536udc.*
406 406
407AMD GEODE PROCESSOR/CHIPSET SUPPORT 407AMD GEODE PROCESSOR/CHIPSET SUPPORT
408P: Jordan Crouse 408P: Andres Salomon <dilinger@queued.net>
409L: linux-geode@lists.infradead.org (moderated for non-subscribers) 409L: linux-geode@lists.infradead.org (moderated for non-subscribers)
410W: http://www.amd.com/us-en/ConnectivitySolutions/TechnicalResources/0,,50_2334_2452_11363,00.html 410W: http://www.amd.com/us-en/ConnectivitySolutions/TechnicalResources/0,,50_2334_2452_11363,00.html
411S: Supported 411S: Supported
@@ -792,11 +792,14 @@ S: Maintained
792 792
793ARM/NOMADIK ARCHITECTURE 793ARM/NOMADIK ARCHITECTURE
794M: Alessandro Rubini <rubini@unipv.it> 794M: Alessandro Rubini <rubini@unipv.it>
795M: Linus Walleij <linus.walleij@stericsson.com>
795M: STEricsson <STEricsson_nomadik_linux@list.st.com> 796M: STEricsson <STEricsson_nomadik_linux@list.st.com>
796L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) 797L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
797S: Maintained 798S: Maintained
798F: arch/arm/mach-nomadik/ 799F: arch/arm/mach-nomadik/
799F: arch/arm/plat-nomadik/ 800F: arch/arm/plat-nomadik/
801F: drivers/i2c/busses/i2c-nomadik.c
802T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git
800 803
801ARM/OPENMOKO NEO FREERUNNER (GTA02) MACHINE SUPPORT 804ARM/OPENMOKO NEO FREERUNNER (GTA02) MACHINE SUPPORT
802M: Nelson Castillo <arhuaco@freaks-unidos.net> 805M: Nelson Castillo <arhuaco@freaks-unidos.net>
@@ -998,12 +1001,24 @@ F: drivers/i2c/busses/i2c-stu300.c
998F: drivers/rtc/rtc-coh901331.c 1001F: drivers/rtc/rtc-coh901331.c
999F: drivers/watchdog/coh901327_wdt.c 1002F: drivers/watchdog/coh901327_wdt.c
1000F: drivers/dma/coh901318* 1003F: drivers/dma/coh901318*
1004F: drivers/mfd/ab3100*
1005F: drivers/rtc/rtc-ab3100.c
1006F: drivers/rtc/rtc-coh901331.c
1007T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git
1001 1008
1002ARM/U8500 ARM ARCHITECTURE 1009ARM/Ux500 ARM ARCHITECTURE
1003M: Srinidhi Kasagar <srinidhi.kasagar@stericsson.com> 1010M: Srinidhi Kasagar <srinidhi.kasagar@stericsson.com>
1011M: Linus Walleij <linus.walleij@stericsson.com>
1004L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) 1012L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
1005S: Maintained 1013S: Maintained
1006F: arch/arm/mach-ux500/ 1014F: arch/arm/mach-ux500/
1015F: drivers/dma/ste_dma40*
1016F: drivers/mfd/ab3550*
1017F: drivers/mfd/abx500*
1018F: drivers/mfd/ab8500*
1019F: drivers/mfd/stmpe*
1020F: drivers/rtc/rtc-ab8500.c
1021T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git
1007 1022
1008ARM/VFP SUPPORT 1023ARM/VFP SUPPORT
1009M: Russell King <linux@arm.linux.org.uk> 1024M: Russell King <linux@arm.linux.org.uk>
@@ -2797,6 +2812,10 @@ M: Thomas Gleixner <tglx@linutronix.de>
2797S: Maintained 2812S: Maintained
2798F: Documentation/timers/ 2813F: Documentation/timers/
2799F: kernel/hrtimer.c 2814F: kernel/hrtimer.c
2815F: kernel/time/clockevents.c
2816F: kernel/time/tick*.*
2817F: kernel/time/timer_*.c
2818F include/linux/clockevents.h
2800F: include/linux/hrtimer.h 2819F: include/linux/hrtimer.h
2801 2820
2802HIGH-SPEED SCC DRIVER FOR AX.25 2821HIGH-SPEED SCC DRIVER FOR AX.25
@@ -4590,7 +4609,7 @@ F: drivers/pcmcia/
4590F: include/pcmcia/ 4609F: include/pcmcia/
4591 4610
4592PCNET32 NETWORK DRIVER 4611PCNET32 NETWORK DRIVER
4593M: Don Fry <pcnet32@verizon.net> 4612M: Don Fry <pcnet32@frontier.com>
4594L: netdev@vger.kernel.org 4613L: netdev@vger.kernel.org
4595S: Maintained 4614S: Maintained
4596F: drivers/net/pcnet32.c 4615F: drivers/net/pcnet32.c
@@ -4612,7 +4631,7 @@ PERFORMANCE EVENTS SUBSYSTEM
4612M: Peter Zijlstra <a.p.zijlstra@chello.nl> 4631M: Peter Zijlstra <a.p.zijlstra@chello.nl>
4613M: Paul Mackerras <paulus@samba.org> 4632M: Paul Mackerras <paulus@samba.org>
4614M: Ingo Molnar <mingo@elte.hu> 4633M: Ingo Molnar <mingo@elte.hu>
4615M: Arnaldo Carvalho de Melo <acme@redhat.com> 4634M: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
4616S: Supported 4635S: Supported
4617F: kernel/perf_event*.c 4636F: kernel/perf_event*.c
4618F: include/linux/perf_event.h 4637F: include/linux/perf_event.h
@@ -5127,6 +5146,18 @@ L: alsa-devel@alsa-project.org (moderated for non-subscribers)
5127S: Supported 5146S: Supported
5128F: sound/soc/s3c24xx 5147F: sound/soc/s3c24xx
5129 5148
5149TIMEKEEPING, NTP
5150M: John Stultz <johnstul@us.ibm.com>
5151M: Thomas Gleixner <tglx@linutronix.de>
5152S: Supported
5153F: include/linux/clocksource.h
5154F: include/linux/time.h
5155F: include/linux/timex.h
5156F: include/linux/timekeeping.h
5157F: kernel/time/clocksource.c
5158F: kernel/time/time*.c
5159F: kernel/time/ntp.c
5160
5130TLG2300 VIDEO4LINUX-2 DRIVER 5161TLG2300 VIDEO4LINUX-2 DRIVER
5131M: Huang Shijie <shijie8@gmail.com> 5162M: Huang Shijie <shijie8@gmail.com>
5132M: Kang Yong <kangyong@telegent.com> 5163M: Kang Yong <kangyong@telegent.com>
diff --git a/Makefile b/Makefile
index 77044b7918a5..74b25559f831 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
1VERSION = 2 1VERSION = 2
2PATCHLEVEL = 6 2PATCHLEVEL = 6
3SUBLEVEL = 37 3SUBLEVEL = 37
4EXTRAVERSION = -rc7 4EXTRAVERSION =
5NAME = Flesh-Eating Bats with Fangs 5NAME = Flesh-Eating Bats with Fangs
6 6
7# *DOCUMENTATION* 7# *DOCUMENTATION*
diff --git a/arch/Kconfig b/arch/Kconfig
index 8bf0fa652eb6..f78c2be4242b 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -175,4 +175,7 @@ config HAVE_PERF_EVENTS_NMI
175config HAVE_ARCH_JUMP_LABEL 175config HAVE_ARCH_JUMP_LABEL
176 bool 176 bool
177 177
178config HAVE_ARCH_MUTEX_CPU_RELAX
179 bool
180
178source "kernel/gcov/Kconfig" 181source "kernel/gcov/Kconfig"
diff --git a/arch/alpha/include/asm/perf_event.h b/arch/alpha/include/asm/perf_event.h
index fe792ca818f6..5996e7a6757e 100644
--- a/arch/alpha/include/asm/perf_event.h
+++ b/arch/alpha/include/asm/perf_event.h
@@ -1,10 +1,4 @@
1#ifndef __ASM_ALPHA_PERF_EVENT_H 1#ifndef __ASM_ALPHA_PERF_EVENT_H
2#define __ASM_ALPHA_PERF_EVENT_H 2#define __ASM_ALPHA_PERF_EVENT_H
3 3
4#ifdef CONFIG_PERF_EVENTS
5extern void init_hw_perf_events(void);
6#else
7static inline void init_hw_perf_events(void) { }
8#endif
9
10#endif /* __ASM_ALPHA_PERF_EVENT_H */ 4#endif /* __ASM_ALPHA_PERF_EVENT_H */
diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c
index 5f77afb88e89..4c8bb374eb0a 100644
--- a/arch/alpha/kernel/irq_alpha.c
+++ b/arch/alpha/kernel/irq_alpha.c
@@ -112,8 +112,6 @@ init_IRQ(void)
112 wrent(entInt, 0); 112 wrent(entInt, 0);
113 113
114 alpha_mv.init_irq(); 114 alpha_mv.init_irq();
115
116 init_hw_perf_events();
117} 115}
118 116
119/* 117/*
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 1cc49683fb69..90561c45e7d8 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -14,6 +14,7 @@
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/kdebug.h> 15#include <linux/kdebug.h>
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/init.h>
17 18
18#include <asm/hwrpb.h> 19#include <asm/hwrpb.h>
19#include <asm/atomic.h> 20#include <asm/atomic.h>
@@ -863,13 +864,13 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
863/* 864/*
864 * Init call to initialise performance events at kernel startup. 865 * Init call to initialise performance events at kernel startup.
865 */ 866 */
866void __init init_hw_perf_events(void) 867int __init init_hw_perf_events(void)
867{ 868{
868 pr_info("Performance events: "); 869 pr_info("Performance events: ");
869 870
870 if (!supported_cpu()) { 871 if (!supported_cpu()) {
871 pr_cont("No support for your CPU.\n"); 872 pr_cont("No support for your CPU.\n");
872 return; 873 return 0;
873 } 874 }
874 875
875 pr_cont("Supported CPU type!\n"); 876 pr_cont("Supported CPU type!\n");
@@ -881,6 +882,8 @@ void __init init_hw_perf_events(void)
881 /* And set up PMU specification */ 882 /* And set up PMU specification */
882 alpha_pmu = &ev67_pmu; 883 alpha_pmu = &ev67_pmu;
883 884
884 perf_pmu_register(&pmu); 885 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
885}
886 886
887 return 0;
888}
889early_initcall(init_hw_perf_events);
diff --git a/arch/arm/common/it8152.c b/arch/arm/common/it8152.c
index 1bec96e85196..42ff90b46dfb 100644
--- a/arch/arm/common/it8152.c
+++ b/arch/arm/common/it8152.c
@@ -352,3 +352,4 @@ struct pci_bus * __init it8152_pci_scan_bus(int nr, struct pci_sys_data *sys)
352 return pci_scan_bus(nr, &it8152_ops, sys); 352 return pci_scan_bus(nr, &it8152_ops, sys);
353} 353}
354 354
355EXPORT_SYMBOL(dma_set_coherent_mask);
diff --git a/arch/arm/include/asm/hardware/it8152.h b/arch/arm/include/asm/hardware/it8152.h
index 21fa272301f8..b2f95c72287c 100644
--- a/arch/arm/include/asm/hardware/it8152.h
+++ b/arch/arm/include/asm/hardware/it8152.h
@@ -76,6 +76,7 @@ extern unsigned long it8152_base_address;
76 IT8152_PD_IRQ(0) Audio controller (ACR) 76 IT8152_PD_IRQ(0) Audio controller (ACR)
77 */ 77 */
78#define IT8152_IRQ(x) (IRQ_BOARD_START + (x)) 78#define IT8152_IRQ(x) (IRQ_BOARD_START + (x))
79#define IT8152_LAST_IRQ (IRQ_BOARD_START + 40)
79 80
80/* IRQ-sources in 3 groups - local devices, LPC (serial), and external PCI */ 81/* IRQ-sources in 3 groups - local devices, LPC (serial), and external PCI */
81#define IT8152_LD_IRQ_COUNT 9 82#define IT8152_LD_IRQ_COUNT 9
diff --git a/arch/arm/include/asm/highmem.h b/arch/arm/include/asm/highmem.h
index 1fc684e70ab6..7080e2c8fa62 100644
--- a/arch/arm/include/asm/highmem.h
+++ b/arch/arm/include/asm/highmem.h
@@ -25,9 +25,6 @@ extern void *kmap_high(struct page *page);
25extern void *kmap_high_get(struct page *page); 25extern void *kmap_high_get(struct page *page);
26extern void kunmap_high(struct page *page); 26extern void kunmap_high(struct page *page);
27 27
28extern void *kmap_high_l1_vipt(struct page *page, pte_t *saved_pte);
29extern void kunmap_high_l1_vipt(struct page *page, pte_t saved_pte);
30
31/* 28/*
32 * The following functions are already defined by <linux/highmem.h> 29 * The following functions are already defined by <linux/highmem.h>
33 * when CONFIG_HIGHMEM is not set. 30 * when CONFIG_HIGHMEM is not set.
diff --git a/arch/arm/include/asm/sizes.h b/arch/arm/include/asm/sizes.h
index 4fc1565e4f93..316bb2b2be3d 100644
--- a/arch/arm/include/asm/sizes.h
+++ b/arch/arm/include/asm/sizes.h
@@ -13,9 +13,6 @@
13 * along with this program; if not, write to the Free Software 13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 14 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
15 */ 15 */
16/* DO NOT EDIT!! - this file automatically generated
17 * from .s file by awk -f s2h.awk
18 */
19/* Size definitions 16/* Size definitions
20 * Copyright (C) ARM Limited 1998. All rights reserved. 17 * Copyright (C) ARM Limited 1998. All rights reserved.
21 */ 18 */
@@ -25,6 +22,9 @@
25 22
26/* handy sizes */ 23/* handy sizes */
27#define SZ_16 0x00000010 24#define SZ_16 0x00000010
25#define SZ_32 0x00000020
26#define SZ_64 0x00000040
27#define SZ_128 0x00000080
28#define SZ_256 0x00000100 28#define SZ_256 0x00000100
29#define SZ_512 0x00000200 29#define SZ_512 0x00000200
30 30
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index 1120f18a6b17..80025948b8ad 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -150,6 +150,7 @@ extern unsigned int user_debug;
150#define rmb() dmb() 150#define rmb() dmb()
151#define wmb() mb() 151#define wmb() mb()
152#else 152#else
153#include <asm/memory.h>
153#define mb() do { if (arch_is_coherent()) dmb(); else barrier(); } while (0) 154#define mb() do { if (arch_is_coherent()) dmb(); else barrier(); } while (0)
154#define rmb() do { if (arch_is_coherent()) dmb(); else barrier(); } while (0) 155#define rmb() do { if (arch_is_coherent()) dmb(); else barrier(); } while (0)
155#define wmb() do { if (arch_is_coherent()) dmb(); else barrier(); } while (0) 156#define wmb() do { if (arch_is_coherent()) dmb(); else barrier(); } while (0)
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 8bfa98757cd2..80bf8cd88d7c 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -29,6 +29,9 @@ ret_fast_syscall:
29 ldr r1, [tsk, #TI_FLAGS] 29 ldr r1, [tsk, #TI_FLAGS]
30 tst r1, #_TIF_WORK_MASK 30 tst r1, #_TIF_WORK_MASK
31 bne fast_work_pending 31 bne fast_work_pending
32#if defined(CONFIG_IRQSOFF_TRACER)
33 asm_trace_hardirqs_on
34#endif
32 35
33 /* perform architecture specific actions before user return */ 36 /* perform architecture specific actions before user return */
34 arch_ret_to_user r1, lr 37 arch_ret_to_user r1, lr
@@ -65,6 +68,9 @@ ret_slow_syscall:
65 tst r1, #_TIF_WORK_MASK 68 tst r1, #_TIF_WORK_MASK
66 bne work_pending 69 bne work_pending
67no_work_pending: 70no_work_pending:
71#if defined(CONFIG_IRQSOFF_TRACER)
72 asm_trace_hardirqs_on
73#endif
68 /* perform architecture specific actions before user return */ 74 /* perform architecture specific actions before user return */
69 arch_ret_to_user r1, lr 75 arch_ret_to_user r1, lr
70 76
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 07a50357492a..fdfa4976b0bf 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -3034,11 +3034,11 @@ init_hw_perf_events(void)
3034 pr_info("no hardware support available\n"); 3034 pr_info("no hardware support available\n");
3035 } 3035 }
3036 3036
3037 perf_pmu_register(&pmu); 3037 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
3038 3038
3039 return 0; 3039 return 0;
3040} 3040}
3041arch_initcall(init_hw_perf_events); 3041early_initcall(init_hw_perf_events);
3042 3042
3043/* 3043/*
3044 * Callchain handling code. 3044 * Callchain handling code.
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 8c1959590252..9066473c0ebc 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -310,7 +310,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
310 * All kernel threads share the same mm context; grab a 310 * All kernel threads share the same mm context; grab a
311 * reference and switch to it. 311 * reference and switch to it.
312 */ 312 */
313 atomic_inc(&mm->mm_users);
314 atomic_inc(&mm->mm_count); 313 atomic_inc(&mm->mm_count);
315 current->active_mm = mm; 314 current->active_mm = mm;
316 cpumask_set_cpu(cpu, mm_cpumask(mm)); 315 cpumask_set_cpu(cpu, mm_cpumask(mm));
diff --git a/arch/arm/mach-at91/include/mach/at91_mci.h b/arch/arm/mach-at91/include/mach/at91_mci.h
index 57f8ee154943..27ac6f550fe3 100644
--- a/arch/arm/mach-at91/include/mach/at91_mci.h
+++ b/arch/arm/mach-at91/include/mach/at91_mci.h
@@ -74,6 +74,8 @@
74#define AT91_MCI_TRTYP_BLOCK (0 << 19) 74#define AT91_MCI_TRTYP_BLOCK (0 << 19)
75#define AT91_MCI_TRTYP_MULTIPLE (1 << 19) 75#define AT91_MCI_TRTYP_MULTIPLE (1 << 19)
76#define AT91_MCI_TRTYP_STREAM (2 << 19) 76#define AT91_MCI_TRTYP_STREAM (2 << 19)
77#define AT91_MCI_TRTYP_SDIO_BYTE (4 << 19)
78#define AT91_MCI_TRTYP_SDIO_BLOCK (5 << 19)
77 79
78#define AT91_MCI_BLKR 0x18 /* Block Register */ 80#define AT91_MCI_BLKR 0x18 /* Block Register */
79#define AT91_MCI_BLKR_BCNT(n) ((0xffff & (n)) << 0) /* Block count */ 81#define AT91_MCI_BLKR_BCNT(n) ((0xffff & (n)) << 0) /* Block count */
diff --git a/arch/arm/mach-ixp4xx/common-pci.c b/arch/arm/mach-ixp4xx/common-pci.c
index 24498a932ba6..a54b3db80366 100644
--- a/arch/arm/mach-ixp4xx/common-pci.c
+++ b/arch/arm/mach-ixp4xx/common-pci.c
@@ -513,4 +513,4 @@ int dma_set_coherent_mask(struct device *dev, u64 mask)
513 513
514EXPORT_SYMBOL(ixp4xx_pci_read); 514EXPORT_SYMBOL(ixp4xx_pci_read);
515EXPORT_SYMBOL(ixp4xx_pci_write); 515EXPORT_SYMBOL(ixp4xx_pci_write);
516 516EXPORT_SYMBOL(dma_set_coherent_mask);
diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig
index dd235ecc9d6c..c93e73d54dd1 100644
--- a/arch/arm/mach-pxa/Kconfig
+++ b/arch/arm/mach-pxa/Kconfig
@@ -540,6 +540,7 @@ config MACH_ICONTROL
540config ARCH_PXA_ESERIES 540config ARCH_PXA_ESERIES
541 bool "PXA based Toshiba e-series PDAs" 541 bool "PXA based Toshiba e-series PDAs"
542 select PXA25x 542 select PXA25x
543 select FB_W100
543 544
544config MACH_E330 545config MACH_E330
545 bool "Toshiba e330" 546 bool "Toshiba e330"
diff --git a/arch/arm/mach-pxa/sleep.S b/arch/arm/mach-pxa/sleep.S
index 52c30b01a671..ae008110db4e 100644
--- a/arch/arm/mach-pxa/sleep.S
+++ b/arch/arm/mach-pxa/sleep.S
@@ -353,8 +353,8 @@ resume_turn_on_mmu:
353 353
354 @ Let us ensure we jump to resume_after_mmu only when the mcr above 354 @ Let us ensure we jump to resume_after_mmu only when the mcr above
355 @ actually took effect. They call it the "cpwait" operation. 355 @ actually took effect. They call it the "cpwait" operation.
356 mrc p15, 0, r1, c2, c0, 0 @ queue a dependency on CP15 356 mrc p15, 0, r0, c2, c0, 0 @ queue a dependency on CP15
357 sub pc, r2, r1, lsr #32 @ jump to virtual addr 357 sub pc, r2, r0, lsr #32 @ jump to virtual addr
358 nop 358 nop
359 nop 359 nop
360 nop 360 nop
diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c
index 6e77c042d8e9..e0b0e7a4ec68 100644
--- a/arch/arm/mm/cache-feroceon-l2.c
+++ b/arch/arm/mm/cache-feroceon-l2.c
@@ -13,13 +13,9 @@
13 */ 13 */
14 14
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/highmem.h>
16#include <asm/cacheflush.h> 17#include <asm/cacheflush.h>
17#include <asm/kmap_types.h>
18#include <asm/fixmap.h>
19#include <asm/pgtable.h>
20#include <asm/tlbflush.h>
21#include <plat/cache-feroceon-l2.h> 18#include <plat/cache-feroceon-l2.h>
22#include "mm.h"
23 19
24/* 20/*
25 * Low-level cache maintenance operations. 21 * Low-level cache maintenance operations.
@@ -39,27 +35,30 @@
39 * between which we don't want to be preempted. 35 * between which we don't want to be preempted.
40 */ 36 */
41 37
42static inline unsigned long l2_start_va(unsigned long paddr) 38static inline unsigned long l2_get_va(unsigned long paddr)
43{ 39{
44#ifdef CONFIG_HIGHMEM 40#ifdef CONFIG_HIGHMEM
45 /* 41 /*
46 * Let's do our own fixmap stuff in a minimal way here.
47 * Because range ops can't be done on physical addresses, 42 * Because range ops can't be done on physical addresses,
48 * we simply install a virtual mapping for it only for the 43 * we simply install a virtual mapping for it only for the
49 * TLB lookup to occur, hence no need to flush the untouched 44 * TLB lookup to occur, hence no need to flush the untouched
50 * memory mapping. This is protected with the disabling of 45 * memory mapping afterwards (note: a cache flush may happen
51 * interrupts by the caller. 46 * in some circumstances depending on the path taken in kunmap_atomic).
52 */ 47 */
53 unsigned long idx = KM_L2_CACHE + KM_TYPE_NR * smp_processor_id(); 48 void *vaddr = kmap_atomic_pfn(paddr >> PAGE_SHIFT);
54 unsigned long vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); 49 return (unsigned long)vaddr + (paddr & ~PAGE_MASK);
55 set_pte_ext(TOP_PTE(vaddr), pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL), 0);
56 local_flush_tlb_kernel_page(vaddr);
57 return vaddr + (paddr & ~PAGE_MASK);
58#else 50#else
59 return __phys_to_virt(paddr); 51 return __phys_to_virt(paddr);
60#endif 52#endif
61} 53}
62 54
55static inline void l2_put_va(unsigned long vaddr)
56{
57#ifdef CONFIG_HIGHMEM
58 kunmap_atomic((void *)vaddr);
59#endif
60}
61
63static inline void l2_clean_pa(unsigned long addr) 62static inline void l2_clean_pa(unsigned long addr)
64{ 63{
65 __asm__("mcr p15, 1, %0, c15, c9, 3" : : "r" (addr)); 64 __asm__("mcr p15, 1, %0, c15, c9, 3" : : "r" (addr));
@@ -76,13 +75,14 @@ static inline void l2_clean_pa_range(unsigned long start, unsigned long end)
76 */ 75 */
77 BUG_ON((start ^ end) >> PAGE_SHIFT); 76 BUG_ON((start ^ end) >> PAGE_SHIFT);
78 77
79 raw_local_irq_save(flags); 78 va_start = l2_get_va(start);
80 va_start = l2_start_va(start);
81 va_end = va_start + (end - start); 79 va_end = va_start + (end - start);
80 raw_local_irq_save(flags);
82 __asm__("mcr p15, 1, %0, c15, c9, 4\n\t" 81 __asm__("mcr p15, 1, %0, c15, c9, 4\n\t"
83 "mcr p15, 1, %1, c15, c9, 5" 82 "mcr p15, 1, %1, c15, c9, 5"
84 : : "r" (va_start), "r" (va_end)); 83 : : "r" (va_start), "r" (va_end));
85 raw_local_irq_restore(flags); 84 raw_local_irq_restore(flags);
85 l2_put_va(va_start);
86} 86}
87 87
88static inline void l2_clean_inv_pa(unsigned long addr) 88static inline void l2_clean_inv_pa(unsigned long addr)
@@ -106,13 +106,14 @@ static inline void l2_inv_pa_range(unsigned long start, unsigned long end)
106 */ 106 */
107 BUG_ON((start ^ end) >> PAGE_SHIFT); 107 BUG_ON((start ^ end) >> PAGE_SHIFT);
108 108
109 raw_local_irq_save(flags); 109 va_start = l2_get_va(start);
110 va_start = l2_start_va(start);
111 va_end = va_start + (end - start); 110 va_end = va_start + (end - start);
111 raw_local_irq_save(flags);
112 __asm__("mcr p15, 1, %0, c15, c11, 4\n\t" 112 __asm__("mcr p15, 1, %0, c15, c11, 4\n\t"
113 "mcr p15, 1, %1, c15, c11, 5" 113 "mcr p15, 1, %1, c15, c11, 5"
114 : : "r" (va_start), "r" (va_end)); 114 : : "r" (va_start), "r" (va_end));
115 raw_local_irq_restore(flags); 115 raw_local_irq_restore(flags);
116 l2_put_va(va_start);
116} 117}
117 118
118static inline void l2_inv_all(void) 119static inline void l2_inv_all(void)
diff --git a/arch/arm/mm/cache-xsc3l2.c b/arch/arm/mm/cache-xsc3l2.c
index c3154928bccd..5a32020471e3 100644
--- a/arch/arm/mm/cache-xsc3l2.c
+++ b/arch/arm/mm/cache-xsc3l2.c
@@ -17,14 +17,10 @@
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19#include <linux/init.h> 19#include <linux/init.h>
20#include <linux/highmem.h>
20#include <asm/system.h> 21#include <asm/system.h>
21#include <asm/cputype.h> 22#include <asm/cputype.h>
22#include <asm/cacheflush.h> 23#include <asm/cacheflush.h>
23#include <asm/kmap_types.h>
24#include <asm/fixmap.h>
25#include <asm/pgtable.h>
26#include <asm/tlbflush.h>
27#include "mm.h"
28 24
29#define CR_L2 (1 << 26) 25#define CR_L2 (1 << 26)
30 26
@@ -71,16 +67,15 @@ static inline void xsc3_l2_inv_all(void)
71 dsb(); 67 dsb();
72} 68}
73 69
70static inline void l2_unmap_va(unsigned long va)
71{
74#ifdef CONFIG_HIGHMEM 72#ifdef CONFIG_HIGHMEM
75#define l2_map_save_flags(x) raw_local_save_flags(x) 73 if (va != -1)
76#define l2_map_restore_flags(x) raw_local_irq_restore(x) 74 kunmap_atomic((void *)va);
77#else
78#define l2_map_save_flags(x) ((x) = 0)
79#define l2_map_restore_flags(x) ((void)(x))
80#endif 75#endif
76}
81 77
82static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va, 78static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va)
83 unsigned long flags)
84{ 79{
85#ifdef CONFIG_HIGHMEM 80#ifdef CONFIG_HIGHMEM
86 unsigned long va = prev_va & PAGE_MASK; 81 unsigned long va = prev_va & PAGE_MASK;
@@ -89,17 +84,10 @@ static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va,
89 /* 84 /*
90 * Switching to a new page. Because cache ops are 85 * Switching to a new page. Because cache ops are
91 * using virtual addresses only, we must put a mapping 86 * using virtual addresses only, we must put a mapping
92 * in place for it. We also enable interrupts for a 87 * in place for it.
93 * short while and disable them again to protect this
94 * mapping.
95 */ 88 */
96 unsigned long idx; 89 l2_unmap_va(prev_va);
97 raw_local_irq_restore(flags); 90 va = (unsigned long)kmap_atomic_pfn(pa >> PAGE_SHIFT);
98 idx = KM_L2_CACHE + KM_TYPE_NR * smp_processor_id();
99 va = __fix_to_virt(FIX_KMAP_BEGIN + idx);
100 raw_local_irq_restore(flags | PSR_I_BIT);
101 set_pte_ext(TOP_PTE(va), pfn_pte(pa >> PAGE_SHIFT, PAGE_KERNEL), 0);
102 local_flush_tlb_kernel_page(va);
103 } 91 }
104 return va + (pa_offset >> (32 - PAGE_SHIFT)); 92 return va + (pa_offset >> (32 - PAGE_SHIFT));
105#else 93#else
@@ -109,7 +97,7 @@ static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va,
109 97
110static void xsc3_l2_inv_range(unsigned long start, unsigned long end) 98static void xsc3_l2_inv_range(unsigned long start, unsigned long end)
111{ 99{
112 unsigned long vaddr, flags; 100 unsigned long vaddr;
113 101
114 if (start == 0 && end == -1ul) { 102 if (start == 0 && end == -1ul) {
115 xsc3_l2_inv_all(); 103 xsc3_l2_inv_all();
@@ -117,13 +105,12 @@ static void xsc3_l2_inv_range(unsigned long start, unsigned long end)
117 } 105 }
118 106
119 vaddr = -1; /* to force the first mapping */ 107 vaddr = -1; /* to force the first mapping */
120 l2_map_save_flags(flags);
121 108
122 /* 109 /*
123 * Clean and invalidate partial first cache line. 110 * Clean and invalidate partial first cache line.
124 */ 111 */
125 if (start & (CACHE_LINE_SIZE - 1)) { 112 if (start & (CACHE_LINE_SIZE - 1)) {
126 vaddr = l2_map_va(start & ~(CACHE_LINE_SIZE - 1), vaddr, flags); 113 vaddr = l2_map_va(start & ~(CACHE_LINE_SIZE - 1), vaddr);
127 xsc3_l2_clean_mva(vaddr); 114 xsc3_l2_clean_mva(vaddr);
128 xsc3_l2_inv_mva(vaddr); 115 xsc3_l2_inv_mva(vaddr);
129 start = (start | (CACHE_LINE_SIZE - 1)) + 1; 116 start = (start | (CACHE_LINE_SIZE - 1)) + 1;
@@ -133,7 +120,7 @@ static void xsc3_l2_inv_range(unsigned long start, unsigned long end)
133 * Invalidate all full cache lines between 'start' and 'end'. 120 * Invalidate all full cache lines between 'start' and 'end'.
134 */ 121 */
135 while (start < (end & ~(CACHE_LINE_SIZE - 1))) { 122 while (start < (end & ~(CACHE_LINE_SIZE - 1))) {
136 vaddr = l2_map_va(start, vaddr, flags); 123 vaddr = l2_map_va(start, vaddr);
137 xsc3_l2_inv_mva(vaddr); 124 xsc3_l2_inv_mva(vaddr);
138 start += CACHE_LINE_SIZE; 125 start += CACHE_LINE_SIZE;
139 } 126 }
@@ -142,31 +129,30 @@ static void xsc3_l2_inv_range(unsigned long start, unsigned long end)
142 * Clean and invalidate partial last cache line. 129 * Clean and invalidate partial last cache line.
143 */ 130 */
144 if (start < end) { 131 if (start < end) {
145 vaddr = l2_map_va(start, vaddr, flags); 132 vaddr = l2_map_va(start, vaddr);
146 xsc3_l2_clean_mva(vaddr); 133 xsc3_l2_clean_mva(vaddr);
147 xsc3_l2_inv_mva(vaddr); 134 xsc3_l2_inv_mva(vaddr);
148 } 135 }
149 136
150 l2_map_restore_flags(flags); 137 l2_unmap_va(vaddr);
151 138
152 dsb(); 139 dsb();
153} 140}
154 141
155static void xsc3_l2_clean_range(unsigned long start, unsigned long end) 142static void xsc3_l2_clean_range(unsigned long start, unsigned long end)
156{ 143{
157 unsigned long vaddr, flags; 144 unsigned long vaddr;
158 145
159 vaddr = -1; /* to force the first mapping */ 146 vaddr = -1; /* to force the first mapping */
160 l2_map_save_flags(flags);
161 147
162 start &= ~(CACHE_LINE_SIZE - 1); 148 start &= ~(CACHE_LINE_SIZE - 1);
163 while (start < end) { 149 while (start < end) {
164 vaddr = l2_map_va(start, vaddr, flags); 150 vaddr = l2_map_va(start, vaddr);
165 xsc3_l2_clean_mva(vaddr); 151 xsc3_l2_clean_mva(vaddr);
166 start += CACHE_LINE_SIZE; 152 start += CACHE_LINE_SIZE;
167 } 153 }
168 154
169 l2_map_restore_flags(flags); 155 l2_unmap_va(vaddr);
170 156
171 dsb(); 157 dsb();
172} 158}
@@ -193,7 +179,7 @@ static inline void xsc3_l2_flush_all(void)
193 179
194static void xsc3_l2_flush_range(unsigned long start, unsigned long end) 180static void xsc3_l2_flush_range(unsigned long start, unsigned long end)
195{ 181{
196 unsigned long vaddr, flags; 182 unsigned long vaddr;
197 183
198 if (start == 0 && end == -1ul) { 184 if (start == 0 && end == -1ul) {
199 xsc3_l2_flush_all(); 185 xsc3_l2_flush_all();
@@ -201,17 +187,16 @@ static void xsc3_l2_flush_range(unsigned long start, unsigned long end)
201 } 187 }
202 188
203 vaddr = -1; /* to force the first mapping */ 189 vaddr = -1; /* to force the first mapping */
204 l2_map_save_flags(flags);
205 190
206 start &= ~(CACHE_LINE_SIZE - 1); 191 start &= ~(CACHE_LINE_SIZE - 1);
207 while (start < end) { 192 while (start < end) {
208 vaddr = l2_map_va(start, vaddr, flags); 193 vaddr = l2_map_va(start, vaddr);
209 xsc3_l2_clean_mva(vaddr); 194 xsc3_l2_clean_mva(vaddr);
210 xsc3_l2_inv_mva(vaddr); 195 xsc3_l2_inv_mva(vaddr);
211 start += CACHE_LINE_SIZE; 196 start += CACHE_LINE_SIZE;
212 } 197 }
213 198
214 l2_map_restore_flags(flags); 199 l2_unmap_va(vaddr);
215 200
216 dsb(); 201 dsb();
217} 202}
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ac6a36142fcd..809f1bf9fa29 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -17,6 +17,7 @@
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/device.h> 18#include <linux/device.h>
19#include <linux/dma-mapping.h> 19#include <linux/dma-mapping.h>
20#include <linux/highmem.h>
20 21
21#include <asm/memory.h> 22#include <asm/memory.h>
22#include <asm/highmem.h> 23#include <asm/highmem.h>
@@ -480,10 +481,10 @@ static void dma_cache_maint_page(struct page *page, unsigned long offset,
480 op(vaddr, len, dir); 481 op(vaddr, len, dir);
481 kunmap_high(page); 482 kunmap_high(page);
482 } else if (cache_is_vipt()) { 483 } else if (cache_is_vipt()) {
483 pte_t saved_pte; 484 /* unmapped pages might still be cached */
484 vaddr = kmap_high_l1_vipt(page, &saved_pte); 485 vaddr = kmap_atomic(page);
485 op(vaddr + offset, len, dir); 486 op(vaddr + offset, len, dir);
486 kunmap_high_l1_vipt(page, saved_pte); 487 kunmap_atomic(vaddr);
487 } 488 }
488 } else { 489 } else {
489 vaddr = page_address(page) + offset; 490 vaddr = page_address(page) + offset;
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 391ffae75098..c29f2839f1d2 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -10,6 +10,7 @@
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/mm.h> 11#include <linux/mm.h>
12#include <linux/pagemap.h> 12#include <linux/pagemap.h>
13#include <linux/highmem.h>
13 14
14#include <asm/cacheflush.h> 15#include <asm/cacheflush.h>
15#include <asm/cachetype.h> 16#include <asm/cachetype.h>
@@ -180,10 +181,10 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page)
180 __cpuc_flush_dcache_area(addr, PAGE_SIZE); 181 __cpuc_flush_dcache_area(addr, PAGE_SIZE);
181 kunmap_high(page); 182 kunmap_high(page);
182 } else if (cache_is_vipt()) { 183 } else if (cache_is_vipt()) {
183 pte_t saved_pte; 184 /* unmapped pages might still be cached */
184 addr = kmap_high_l1_vipt(page, &saved_pte); 185 addr = kmap_atomic(page);
185 __cpuc_flush_dcache_area(addr, PAGE_SIZE); 186 __cpuc_flush_dcache_area(addr, PAGE_SIZE);
186 kunmap_high_l1_vipt(page, saved_pte); 187 kunmap_atomic(addr);
187 } 188 }
188 } 189 }
189 190
diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
index c435fd9e1da9..807c0573abbe 100644
--- a/arch/arm/mm/highmem.c
+++ b/arch/arm/mm/highmem.c
@@ -140,90 +140,3 @@ struct page *kmap_atomic_to_page(const void *ptr)
140 pte = TOP_PTE(vaddr); 140 pte = TOP_PTE(vaddr);
141 return pte_page(*pte); 141 return pte_page(*pte);
142} 142}
143
144#ifdef CONFIG_CPU_CACHE_VIPT
145
146#include <linux/percpu.h>
147
148/*
149 * The VIVT cache of a highmem page is always flushed before the page
150 * is unmapped. Hence unmapped highmem pages need no cache maintenance
151 * in that case.
152 *
153 * However unmapped pages may still be cached with a VIPT cache, and
154 * it is not possible to perform cache maintenance on them using physical
155 * addresses unfortunately. So we have no choice but to set up a temporary
156 * virtual mapping for that purpose.
157 *
158 * Yet this VIPT cache maintenance may be triggered from DMA support
159 * functions which are possibly called from interrupt context. As we don't
160 * want to keep interrupt disabled all the time when such maintenance is
161 * taking place, we therefore allow for some reentrancy by preserving and
162 * restoring the previous fixmap entry before the interrupted context is
163 * resumed. If the reentrancy depth is 0 then there is no need to restore
164 * the previous fixmap, and leaving the current one in place allow it to
165 * be reused the next time without a TLB flush (common with DMA).
166 */
167
168static DEFINE_PER_CPU(int, kmap_high_l1_vipt_depth);
169
170void *kmap_high_l1_vipt(struct page *page, pte_t *saved_pte)
171{
172 unsigned int idx, cpu;
173 int *depth;
174 unsigned long vaddr, flags;
175 pte_t pte, *ptep;
176
177 if (!in_interrupt())
178 preempt_disable();
179
180 cpu = smp_processor_id();
181 depth = &per_cpu(kmap_high_l1_vipt_depth, cpu);
182
183 idx = KM_L1_CACHE + KM_TYPE_NR * cpu;
184 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
185 ptep = TOP_PTE(vaddr);
186 pte = mk_pte(page, kmap_prot);
187
188 raw_local_irq_save(flags);
189 (*depth)++;
190 if (pte_val(*ptep) == pte_val(pte)) {
191 *saved_pte = pte;
192 } else {
193 *saved_pte = *ptep;
194 set_pte_ext(ptep, pte, 0);
195 local_flush_tlb_kernel_page(vaddr);
196 }
197 raw_local_irq_restore(flags);
198
199 return (void *)vaddr;
200}
201
202void kunmap_high_l1_vipt(struct page *page, pte_t saved_pte)
203{
204 unsigned int idx, cpu = smp_processor_id();
205 int *depth = &per_cpu(kmap_high_l1_vipt_depth, cpu);
206 unsigned long vaddr, flags;
207 pte_t pte, *ptep;
208
209 idx = KM_L1_CACHE + KM_TYPE_NR * cpu;
210 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
211 ptep = TOP_PTE(vaddr);
212 pte = mk_pte(page, kmap_prot);
213
214 BUG_ON(pte_val(*ptep) != pte_val(pte));
215 BUG_ON(*depth <= 0);
216
217 raw_local_irq_save(flags);
218 (*depth)--;
219 if (*depth != 0 && pte_val(pte) != pte_val(saved_pte)) {
220 set_pte_ext(ptep, saved_pte, 0);
221 local_flush_tlb_kernel_page(vaddr);
222 }
223 raw_local_irq_restore(flags);
224
225 if (!in_interrupt())
226 preempt_enable();
227}
228
229#endif /* CONFIG_CPU_CACHE_VIPT */
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 5c7c6fc07565..183e0d226669 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -1047,6 +1047,6 @@ init_hw_perf_events(void)
1047 1047
1048 return 0; 1048 return 0;
1049} 1049}
1050arch_initcall(init_hw_perf_events); 1050early_initcall(init_hw_perf_events);
1051 1051
1052#endif /* defined(CONFIG_CPU_MIPS32)... */ 1052#endif /* defined(CONFIG_CPU_MIPS32)... */
diff --git a/arch/mn10300/kernel/irq.c b/arch/mn10300/kernel/irq.c
index c2e44597c22b..ac11754ecec5 100644
--- a/arch/mn10300/kernel/irq.c
+++ b/arch/mn10300/kernel/irq.c
@@ -459,7 +459,7 @@ void migrate_irqs(void)
459 tmp = CROSS_GxICR(irq, new); 459 tmp = CROSS_GxICR(irq, new);
460 460
461 x &= GxICR_LEVEL | GxICR_ENABLE; 461 x &= GxICR_LEVEL | GxICR_ENABLE;
462 if (GxICR(irq) & GxICR_REQUEST) { 462 if (GxICR(irq) & GxICR_REQUEST)
463 x |= GxICR_REQUEST | GxICR_DETECT; 463 x |= GxICR_REQUEST | GxICR_DETECT;
464 CROSS_GxICR(irq, new) = x; 464 CROSS_GxICR(irq, new) = x;
465 tmp = CROSS_GxICR(irq, new); 465 tmp = CROSS_GxICR(irq, new);
diff --git a/arch/powerpc/kernel/e500-pmu.c b/arch/powerpc/kernel/e500-pmu.c
index 7c07de0d8943..b150b510510f 100644
--- a/arch/powerpc/kernel/e500-pmu.c
+++ b/arch/powerpc/kernel/e500-pmu.c
@@ -126,4 +126,4 @@ static int init_e500_pmu(void)
126 return register_fsl_emb_pmu(&e500_pmu); 126 return register_fsl_emb_pmu(&e500_pmu);
127} 127}
128 128
129arch_initcall(init_e500_pmu); 129early_initcall(init_e500_pmu);
diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c
index 09d72028f317..2cc5e0301d0b 100644
--- a/arch/powerpc/kernel/mpc7450-pmu.c
+++ b/arch/powerpc/kernel/mpc7450-pmu.c
@@ -414,4 +414,4 @@ static int init_mpc7450_pmu(void)
414 return register_power_pmu(&mpc7450_pmu); 414 return register_power_pmu(&mpc7450_pmu);
415} 415}
416 416
417arch_initcall(init_mpc7450_pmu); 417early_initcall(init_mpc7450_pmu);
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 3129c855933c..567480705789 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -1379,7 +1379,7 @@ int register_power_pmu(struct power_pmu *pmu)
1379 freeze_events_kernel = MMCR0_FCHV; 1379 freeze_events_kernel = MMCR0_FCHV;
1380#endif /* CONFIG_PPC64 */ 1380#endif /* CONFIG_PPC64 */
1381 1381
1382 perf_pmu_register(&power_pmu); 1382 perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW);
1383 perf_cpu_notifier(power_pmu_notifier); 1383 perf_cpu_notifier(power_pmu_notifier);
1384 1384
1385 return 0; 1385 return 0;
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index 7ecca59ddf77..4dcf5f831e9d 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -681,7 +681,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
681 pr_info("%s performance monitor hardware support registered\n", 681 pr_info("%s performance monitor hardware support registered\n",
682 pmu->name); 682 pmu->name);
683 683
684 perf_pmu_register(&fsl_emb_pmu); 684 perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW);
685 685
686 return 0; 686 return 0;
687} 687}
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
index 2a361cdda635..ead8b3c2649e 100644
--- a/arch/powerpc/kernel/power4-pmu.c
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -613,4 +613,4 @@ static int init_power4_pmu(void)
613 return register_power_pmu(&power4_pmu); 613 return register_power_pmu(&power4_pmu);
614} 614}
615 615
616arch_initcall(init_power4_pmu); 616early_initcall(init_power4_pmu);
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
index 199de527d411..eca0ac595cb6 100644
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -682,4 +682,4 @@ static int init_power5p_pmu(void)
682 return register_power_pmu(&power5p_pmu); 682 return register_power_pmu(&power5p_pmu);
683} 683}
684 684
685arch_initcall(init_power5p_pmu); 685early_initcall(init_power5p_pmu);
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
index 98b6a729a9dd..d5ff0f64a5e6 100644
--- a/arch/powerpc/kernel/power5-pmu.c
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -621,4 +621,4 @@ static int init_power5_pmu(void)
621 return register_power_pmu(&power5_pmu); 621 return register_power_pmu(&power5_pmu);
622} 622}
623 623
624arch_initcall(init_power5_pmu); 624early_initcall(init_power5_pmu);
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
index 84a607bda8fb..31603927e376 100644
--- a/arch/powerpc/kernel/power6-pmu.c
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -544,4 +544,4 @@ static int init_power6_pmu(void)
544 return register_power_pmu(&power6_pmu); 544 return register_power_pmu(&power6_pmu);
545} 545}
546 546
547arch_initcall(init_power6_pmu); 547early_initcall(init_power6_pmu);
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
index 852f7b7f6b40..593740fcb799 100644
--- a/arch/powerpc/kernel/power7-pmu.c
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -369,4 +369,4 @@ static int init_power7_pmu(void)
369 return register_power_pmu(&power7_pmu); 369 return register_power_pmu(&power7_pmu);
370} 370}
371 371
372arch_initcall(init_power7_pmu); 372early_initcall(init_power7_pmu);
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
index 3fee685de4df..9a6e093858fe 100644
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -494,4 +494,4 @@ static int init_ppc970_pmu(void)
494 return register_power_pmu(&ppc970_pmu); 494 return register_power_pmu(&ppc970_pmu);
495} 495}
496 496
497arch_initcall(init_ppc970_pmu); 497early_initcall(init_ppc970_pmu);
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index fea833e18ad5..e0d703c7fdf7 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -63,6 +63,7 @@
63#include <linux/of_gpio.h> 63#include <linux/of_gpio.h>
64#include <linux/kernel.h> 64#include <linux/kernel.h>
65#include <linux/slab.h> 65#include <linux/slab.h>
66#include <linux/fs.h>
66#include <linux/watchdog.h> 67#include <linux/watchdog.h>
67#include <linux/miscdevice.h> 68#include <linux/miscdevice.h>
68#include <linux/uaccess.h> 69#include <linux/uaccess.h>
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index e0b98e71ff47..6c6d7b339aae 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -99,6 +99,7 @@ config S390
99 select HAVE_KERNEL_LZMA 99 select HAVE_KERNEL_LZMA
100 select HAVE_KERNEL_LZO 100 select HAVE_KERNEL_LZO
101 select HAVE_GET_USER_PAGES_FAST 101 select HAVE_GET_USER_PAGES_FAST
102 select HAVE_ARCH_MUTEX_CPU_RELAX
102 select ARCH_INLINE_SPIN_TRYLOCK 103 select ARCH_INLINE_SPIN_TRYLOCK
103 select ARCH_INLINE_SPIN_TRYLOCK_BH 104 select ARCH_INLINE_SPIN_TRYLOCK_BH
104 select ARCH_INLINE_SPIN_LOCK 105 select ARCH_INLINE_SPIN_LOCK
diff --git a/arch/s390/include/asm/mutex.h b/arch/s390/include/asm/mutex.h
index 458c1f7fbc18..688271f5f2e4 100644
--- a/arch/s390/include/asm/mutex.h
+++ b/arch/s390/include/asm/mutex.h
@@ -7,3 +7,5 @@
7 */ 7 */
8 8
9#include <asm-generic/mutex-dec.h> 9#include <asm-generic/mutex-dec.h>
10
11#define arch_mutex_cpu_relax() barrier()
diff --git a/arch/sh/boards/mach-se/7206/irq.c b/arch/sh/boards/mach-se/7206/irq.c
index d961949600fd..9070d7e60704 100644
--- a/arch/sh/boards/mach-se/7206/irq.c
+++ b/arch/sh/boards/mach-se/7206/irq.c
@@ -140,7 +140,7 @@ void __init init_se7206_IRQ(void)
140 make_se7206_irq(IRQ1_IRQ); /* ATA */ 140 make_se7206_irq(IRQ1_IRQ); /* ATA */
141 make_se7206_irq(IRQ3_IRQ); /* SLOT / PCM */ 141 make_se7206_irq(IRQ3_IRQ); /* SLOT / PCM */
142 142
143 __raw_writew(__raw_readw(INTC_ICR1) | 0x000b, INTC_ICR); /* ICR1 */ 143 __raw_writew(__raw_readw(INTC_ICR1) | 0x000b, INTC_ICR1); /* ICR1 */
144 144
145 /* FPGA System register setup*/ 145 /* FPGA System register setup*/
146 __raw_writew(0x0000,INTSTS0); /* Clear INTSTS0 */ 146 __raw_writew(0x0000,INTSTS0); /* Clear INTSTS0 */
diff --git a/arch/sh/kernel/cpu/sh2a/clock-sh7201.c b/arch/sh/kernel/cpu/sh2a/clock-sh7201.c
index b26264dc2aef..c509c40cba4b 100644
--- a/arch/sh/kernel/cpu/sh2a/clock-sh7201.c
+++ b/arch/sh/kernel/cpu/sh2a/clock-sh7201.c
@@ -34,7 +34,7 @@ static const int pfc_divisors[]={1,2,3,4,6,8,12};
34 34
35static void master_clk_init(struct clk *clk) 35static void master_clk_init(struct clk *clk)
36{ 36{
37 return 10000000 * PLL2 * pll1rate[(__raw_readw(FREQCR) >> 8) & 0x0007]; 37 clk->rate = 10000000 * PLL2 * pll1rate[(__raw_readw(FREQCR) >> 8) & 0x0007];
38} 38}
39 39
40static struct clk_ops sh7201_master_clk_ops = { 40static struct clk_ops sh7201_master_clk_ops = {
diff --git a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
index b601fa3978d1..6282a839e08e 100644
--- a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
+++ b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c
@@ -81,8 +81,7 @@ static void shoc_clk_init(struct clk *clk)
81 for (i = 0; i < ARRAY_SIZE(frqcr3_divisors); i++) { 81 for (i = 0; i < ARRAY_SIZE(frqcr3_divisors); i++) {
82 int divisor = frqcr3_divisors[i]; 82 int divisor = frqcr3_divisors[i];
83 83
84 if (clk->ops->set_rate(clk, clk->parent->rate / 84 if (clk->ops->set_rate(clk, clk->parent->rate / divisor) == 0)
85 divisor, 0) == 0)
86 break; 85 break;
87 } 86 }
88 87
diff --git a/arch/sh/kernel/cpu/sh4/perf_event.c b/arch/sh/kernel/cpu/sh4/perf_event.c
index dbf3b4bb71fe..748955df018d 100644
--- a/arch/sh/kernel/cpu/sh4/perf_event.c
+++ b/arch/sh/kernel/cpu/sh4/perf_event.c
@@ -250,4 +250,4 @@ static int __init sh7750_pmu_init(void)
250 250
251 return register_sh_pmu(&sh7750_pmu); 251 return register_sh_pmu(&sh7750_pmu);
252} 252}
253arch_initcall(sh7750_pmu_init); 253early_initcall(sh7750_pmu_init);
diff --git a/arch/sh/kernel/cpu/sh4a/perf_event.c b/arch/sh/kernel/cpu/sh4a/perf_event.c
index 580276525731..17e6bebfede0 100644
--- a/arch/sh/kernel/cpu/sh4a/perf_event.c
+++ b/arch/sh/kernel/cpu/sh4a/perf_event.c
@@ -284,4 +284,4 @@ static int __init sh4a_pmu_init(void)
284 284
285 return register_sh_pmu(&sh4a_pmu); 285 return register_sh_pmu(&sh4a_pmu);
286} 286}
287arch_initcall(sh4a_pmu_init); 287early_initcall(sh4a_pmu_init);
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 5a4b33435650..2ee21a47b5af 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -389,7 +389,7 @@ int __cpuinit register_sh_pmu(struct sh_pmu *_pmu)
389 389
390 WARN_ON(_pmu->num_events > MAX_HWEVENTS); 390 WARN_ON(_pmu->num_events > MAX_HWEVENTS);
391 391
392 perf_pmu_register(&pmu); 392 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
393 perf_cpu_notifier(sh_pmu_notifier); 393 perf_cpu_notifier(sh_pmu_notifier);
394 return 0; 394 return 0;
395} 395}
diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h
index 6e8bfa1786da..4d3dbe3703e9 100644
--- a/arch/sparc/include/asm/perf_event.h
+++ b/arch/sparc/include/asm/perf_event.h
@@ -4,8 +4,6 @@
4#ifdef CONFIG_PERF_EVENTS 4#ifdef CONFIG_PERF_EVENTS
5#include <asm/ptrace.h> 5#include <asm/ptrace.h>
6 6
7extern void init_hw_perf_events(void);
8
9#define perf_arch_fetch_caller_regs(regs, ip) \ 7#define perf_arch_fetch_caller_regs(regs, ip) \
10do { \ 8do { \
11 unsigned long _pstate, _asi, _pil, _i7, _fp; \ 9 unsigned long _pstate, _asi, _pil, _i7, _fp; \
@@ -26,8 +24,6 @@ do { \
26 (regs)->u_regs[UREG_I6] = _fp; \ 24 (regs)->u_regs[UREG_I6] = _fp; \
27 (regs)->u_regs[UREG_I7] = _i7; \ 25 (regs)->u_regs[UREG_I7] = _i7; \
28} while (0) 26} while (0)
29#else
30static inline void init_hw_perf_events(void) { }
31#endif 27#endif
32 28
33#endif 29#endif
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index a4bd7ba74c89..300f810142f5 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -270,8 +270,6 @@ int __init nmi_init(void)
270 atomic_set(&nmi_active, -1); 270 atomic_set(&nmi_active, -1);
271 } 271 }
272 } 272 }
273 if (!err)
274 init_hw_perf_events();
275 273
276 return err; 274 return err;
277} 275}
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 0d6deb55a2ae..760578687e7c 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1307,20 +1307,23 @@ static bool __init supported_pmu(void)
1307 return false; 1307 return false;
1308} 1308}
1309 1309
1310void __init init_hw_perf_events(void) 1310int __init init_hw_perf_events(void)
1311{ 1311{
1312 pr_info("Performance events: "); 1312 pr_info("Performance events: ");
1313 1313
1314 if (!supported_pmu()) { 1314 if (!supported_pmu()) {
1315 pr_cont("No support for PMU type '%s'\n", sparc_pmu_type); 1315 pr_cont("No support for PMU type '%s'\n", sparc_pmu_type);
1316 return; 1316 return 0;
1317 } 1317 }
1318 1318
1319 pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); 1319 pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
1320 1320
1321 perf_pmu_register(&pmu); 1321 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
1322 register_die_notifier(&perf_event_nmi_notifier); 1322 register_die_notifier(&perf_event_nmi_notifier);
1323
1324 return 0;
1323} 1325}
1326early_initcall(init_hw_perf_events);
1324 1327
1325void perf_callchain_kernel(struct perf_callchain_entry *entry, 1328void perf_callchain_kernel(struct perf_callchain_entry *entry,
1326 struct pt_regs *regs) 1329 struct pt_regs *regs)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e330da21b84f..b6fccb07123e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -377,6 +377,18 @@ config X86_ELAN
377 377
378 If unsure, choose "PC-compatible" instead. 378 If unsure, choose "PC-compatible" instead.
379 379
380config X86_INTEL_CE
381 bool "CE4100 TV platform"
382 depends on PCI
383 depends on PCI_GODIRECT
384 depends on X86_32
385 depends on X86_EXTENDED_PLATFORM
386 select X86_REBOOTFIXUPS
387 ---help---
388 Select for the Intel CE media processor (CE4100) SOC.
389 This option compiles in support for the CE4100 SOC for settop
390 boxes and media devices.
391
380config X86_MRST 392config X86_MRST
381 bool "Moorestown MID platform" 393 bool "Moorestown MID platform"
382 depends on PCI 394 depends on PCI
@@ -385,6 +397,10 @@ config X86_MRST
385 depends on X86_EXTENDED_PLATFORM 397 depends on X86_EXTENDED_PLATFORM
386 depends on X86_IO_APIC 398 depends on X86_IO_APIC
387 select APB_TIMER 399 select APB_TIMER
400 select I2C
401 select SPI
402 select INTEL_SCU_IPC
403 select X86_PLATFORM_DEVICES
388 ---help--- 404 ---help---
389 Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin 405 Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin
390 Internet Device(MID) platform. Moorestown consists of two chips: 406 Internet Device(MID) platform. Moorestown consists of two chips:
@@ -466,6 +482,19 @@ config X86_ES7000
466 Support for Unisys ES7000 systems. Say 'Y' here if this kernel is 482 Support for Unisys ES7000 systems. Say 'Y' here if this kernel is
467 supposed to run on an IA32-based Unisys ES7000 system. 483 supposed to run on an IA32-based Unisys ES7000 system.
468 484
485config X86_32_IRIS
486 tristate "Eurobraille/Iris poweroff module"
487 depends on X86_32
488 ---help---
489 The Iris machines from EuroBraille do not have APM or ACPI support
490 to shut themselves down properly. A special I/O sequence is
491 needed to do so, which is what this module does at
492 kernel shutdown.
493
494 This is only for Iris machines from EuroBraille.
495
496 If unused, say N.
497
469config SCHED_OMIT_FRAME_POINTER 498config SCHED_OMIT_FRAME_POINTER
470 def_bool y 499 def_bool y
471 prompt "Single-depth WCHAN output" 500 prompt "Single-depth WCHAN output"
@@ -1141,16 +1170,16 @@ config NUMA
1141comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" 1170comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
1142 depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI) 1171 depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI)
1143 1172
1144config K8_NUMA 1173config AMD_NUMA
1145 def_bool y 1174 def_bool y
1146 prompt "Old style AMD Opteron NUMA detection" 1175 prompt "Old style AMD Opteron NUMA detection"
1147 depends on X86_64 && NUMA && PCI 1176 depends on X86_64 && NUMA && PCI
1148 ---help--- 1177 ---help---
1149 Enable K8 NUMA node topology detection. You should say Y here if 1178 Enable AMD NUMA node topology detection. You should say Y here if
1150 you have a multi processor AMD K8 system. This uses an old 1179 you have a multi processor AMD system. This uses an old method to
1151 method to read the NUMA configuration directly from the builtin 1180 read the NUMA configuration directly from the builtin Northbridge
1152 Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA 1181 of Opteron. It is recommended to use X86_64_ACPI_NUMA instead,
1153 instead, which also takes priority if both are compiled in. 1182 which also takes priority if both are compiled in.
1154 1183
1155config X86_64_ACPI_NUMA 1184config X86_64_ACPI_NUMA
1156 def_bool y 1185 def_bool y
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index b59ee765414e..45143bbcfe5e 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -117,6 +117,17 @@ config DEBUG_RODATA_TEST
117 feature as well as for the change_page_attr() infrastructure. 117 feature as well as for the change_page_attr() infrastructure.
118 If in doubt, say "N" 118 If in doubt, say "N"
119 119
120config DEBUG_SET_MODULE_RONX
121 bool "Set loadable kernel module data as NX and text as RO"
122 depends on MODULES
123 ---help---
124 This option helps catch unintended modifications to loadable
125 kernel module's text and read-only data. It also prevents execution
126 of module data. Such protection may interfere with run-time code
127 patching and dynamic kernel tracing - and they might also protect
128 against certain classes of kernel exploits.
129 If in doubt, say "N".
130
120config DEBUG_NX_TEST 131config DEBUG_NX_TEST
121 tristate "Testcase for the NX non-executable stack feature" 132 tristate "Testcase for the NX non-executable stack feature"
122 depends on DEBUG_KERNEL && m 133 depends on DEBUG_KERNEL && m
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 52f85a196fa0..35af09d13dc1 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -182,7 +182,7 @@ no_longmode:
182 hlt 182 hlt
183 jmp 1b 183 jmp 1b
184 184
185#include "../../kernel/verify_cpu_64.S" 185#include "../../kernel/verify_cpu.S"
186 186
187 /* 187 /*
188 * Be careful here startup_64 needs to be at a predictable 188 * Be careful here startup_64 needs to be at a predictable
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 76561d20ea2f..4a2adaa9aefc 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -180,8 +180,15 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len);
180 * On the local CPU you need to be protected again NMI or MCE handlers seeing an 180 * On the local CPU you need to be protected again NMI or MCE handlers seeing an
181 * inconsistent instruction while you patch. 181 * inconsistent instruction while you patch.
182 */ 182 */
183struct text_poke_param {
184 void *addr;
185 const void *opcode;
186 size_t len;
187};
188
183extern void *text_poke(void *addr, const void *opcode, size_t len); 189extern void *text_poke(void *addr, const void *opcode, size_t len);
184extern void *text_poke_smp(void *addr, const void *opcode, size_t len); 190extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
191extern void text_poke_smp_batch(struct text_poke_param *params, int n);
185 192
186#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) 193#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
187#define IDEAL_NOP_SIZE_5 5 194#define IDEAL_NOP_SIZE_5 5
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index c8517f81b21e..6aee50d655d1 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -3,36 +3,53 @@
3 3
4#include <linux/pci.h> 4#include <linux/pci.h>
5 5
6extern struct pci_device_id k8_nb_ids[]; 6extern struct pci_device_id amd_nb_misc_ids[];
7struct bootnode; 7struct bootnode;
8 8
9extern int early_is_k8_nb(u32 value); 9extern int early_is_amd_nb(u32 value);
10extern int cache_k8_northbridges(void); 10extern int amd_cache_northbridges(void);
11extern void k8_flush_garts(void); 11extern void amd_flush_garts(void);
12extern int k8_get_nodes(struct bootnode *nodes); 12extern int amd_get_nodes(struct bootnode *nodes);
13extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn); 13extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn);
14extern int k8_scan_nodes(void); 14extern int amd_scan_nodes(void);
15 15
16struct k8_northbridge_info { 16struct amd_northbridge {
17 struct pci_dev *misc;
18};
19
20struct amd_northbridge_info {
17 u16 num; 21 u16 num;
18 u8 gart_supported; 22 u64 flags;
19 struct pci_dev **nb_misc; 23 struct amd_northbridge *nb;
20}; 24};
21extern struct k8_northbridge_info k8_northbridges; 25extern struct amd_northbridge_info amd_northbridges;
26
27#define AMD_NB_GART 0x1
28#define AMD_NB_L3_INDEX_DISABLE 0x2
22 29
23#ifdef CONFIG_AMD_NB 30#ifdef CONFIG_AMD_NB
24 31
25static inline struct pci_dev *node_to_k8_nb_misc(int node) 32static inline int amd_nb_num(void)
26{ 33{
27 return (node < k8_northbridges.num) ? k8_northbridges.nb_misc[node] : NULL; 34 return amd_northbridges.num;
28} 35}
29 36
30#else 37static inline int amd_nb_has_feature(int feature)
38{
39 return ((amd_northbridges.flags & feature) == feature);
40}
31 41
32static inline struct pci_dev *node_to_k8_nb_misc(int node) 42static inline struct amd_northbridge *node_to_amd_nb(int node)
33{ 43{
34 return NULL; 44 return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL;
35} 45}
46
47#else
48
49#define amd_nb_num(x) 0
50#define amd_nb_has_feature(x) false
51#define node_to_amd_nb(x) NULL
52
36#endif 53#endif
37 54
38 55
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index f6ce0bda3b98..cf12007796db 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -238,6 +238,7 @@ extern void setup_boot_APIC_clock(void);
238extern void setup_secondary_APIC_clock(void); 238extern void setup_secondary_APIC_clock(void);
239extern int APIC_init_uniprocessor(void); 239extern int APIC_init_uniprocessor(void);
240extern void enable_NMI_through_LVT0(void); 240extern void enable_NMI_through_LVT0(void);
241extern int apic_force_enable(void);
241 242
242/* 243/*
243 * On 32bit this is mach-xxx local 244 * On 32bit this is mach-xxx local
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index a859ca461fb0..47a30ff8e517 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -145,6 +145,7 @@
145 145
146#ifdef CONFIG_X86_32 146#ifdef CONFIG_X86_32
147# define MAX_IO_APICS 64 147# define MAX_IO_APICS 64
148# define MAX_LOCAL_APIC 256
148#else 149#else
149# define MAX_IO_APICS 128 150# define MAX_IO_APICS 128
150# define MAX_LOCAL_APIC 32768 151# define MAX_LOCAL_APIC 32768
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h
index 8e6218550e77..c8bfe63a06de 100644
--- a/arch/x86/include/asm/bootparam.h
+++ b/arch/x86/include/asm/bootparam.h
@@ -124,6 +124,7 @@ enum {
124 X86_SUBARCH_LGUEST, 124 X86_SUBARCH_LGUEST,
125 X86_SUBARCH_XEN, 125 X86_SUBARCH_XEN,
126 X86_SUBARCH_MRST, 126 X86_SUBARCH_MRST,
127 X86_SUBARCH_CE4100,
127 X86_NR_SUBARCHS, 128 X86_NR_SUBARCHS,
128}; 129};
129 130
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 9479a037419f..0141b234406f 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -117,6 +117,10 @@ enum fixed_addresses {
117 FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */ 117 FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */
118 FIX_TEXT_POKE0, /* first page is last, because allocation is backward */ 118 FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
119 __end_of_permanent_fixed_addresses, 119 __end_of_permanent_fixed_addresses,
120
121#ifdef CONFIG_X86_MRST
122 FIX_LNW_VRTC,
123#endif
120 /* 124 /*
121 * 256 temporary boot-time mappings, used by early_ioremap(), 125 * 256 temporary boot-time mappings, used by early_ioremap(),
122 * before ioremap() is functional. 126 * before ioremap() is functional.
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index a6b28d017c2f..0c5ca4e30d7b 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -159,7 +159,7 @@ struct io_apic_irq_attr;
159extern int io_apic_set_pci_routing(struct device *dev, int irq, 159extern int io_apic_set_pci_routing(struct device *dev, int irq,
160 struct io_apic_irq_attr *irq_attr); 160 struct io_apic_irq_attr *irq_attr);
161void setup_IO_APIC_irq_extra(u32 gsi); 161void setup_IO_APIC_irq_extra(u32 gsi);
162extern void ioapic_init_mappings(void); 162extern void ioapic_and_gsi_init(void);
163extern void ioapic_insert_resources(void); 163extern void ioapic_insert_resources(void);
164 164
165extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); 165extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
@@ -168,10 +168,9 @@ extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
168extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); 168extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
169extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); 169extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
170 170
171extern void probe_nr_irqs_gsi(void);
172extern int get_nr_irqs_gsi(void); 171extern int get_nr_irqs_gsi(void);
173
174extern void setup_ioapic_ids_from_mpc(void); 172extern void setup_ioapic_ids_from_mpc(void);
173extern void setup_ioapic_ids_from_mpc_nocheck(void);
175 174
176struct mp_ioapic_gsi{ 175struct mp_ioapic_gsi{
177 u32 gsi_base; 176 u32 gsi_base;
@@ -189,9 +188,8 @@ extern void __init pre_init_apic_IRQ0(void);
189#define io_apic_assign_pci_irqs 0 188#define io_apic_assign_pci_irqs 0
190#define setup_ioapic_ids_from_mpc x86_init_noop 189#define setup_ioapic_ids_from_mpc x86_init_noop
191static const int timer_through_8259 = 0; 190static const int timer_through_8259 = 0;
192static inline void ioapic_init_mappings(void) { } 191static inline void ioapic_and_gsi_init(void) { }
193static inline void ioapic_insert_resources(void) { } 192static inline void ioapic_insert_resources(void) { }
194static inline void probe_nr_irqs_gsi(void) { }
195#define gsi_top (NR_IRQS_LEGACY) 193#define gsi_top (NR_IRQS_LEGACY)
196static inline int mp_find_ioapic(u32 gsi) { return 0; } 194static inline int mp_find_ioapic(u32 gsi) { return 0; }
197 195
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 13b0ebaa512f..ba870bb6dd8e 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -15,10 +15,6 @@ static inline int irq_canonicalize(int irq)
15 return ((irq == 2) ? 9 : irq); 15 return ((irq == 2) ? 9 : irq);
16} 16}
17 17
18#ifdef CONFIG_X86_LOCAL_APIC
19# define ARCH_HAS_NMI_WATCHDOG
20#endif
21
22#ifdef CONFIG_X86_32 18#ifdef CONFIG_X86_32
23extern void irq_ctx_init(int cpu); 19extern void irq_ctx_init(int cpu);
24#else 20#else
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 5bdfca86581b..f23eb2528464 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -28,7 +28,7 @@ extern void die(const char *, struct pt_regs *,long);
28extern int __must_check __die(const char *, struct pt_regs *, long); 28extern int __must_check __die(const char *, struct pt_regs *, long);
29extern void show_registers(struct pt_regs *regs); 29extern void show_registers(struct pt_regs *regs);
30extern void show_trace(struct task_struct *t, struct pt_regs *regs, 30extern void show_trace(struct task_struct *t, struct pt_regs *regs,
31 unsigned long *sp, unsigned long bp); 31 unsigned long *sp);
32extern void __show_regs(struct pt_regs *regs, int all); 32extern void __show_regs(struct pt_regs *regs, int all);
33extern void show_regs(struct pt_regs *regs); 33extern void show_regs(struct pt_regs *regs);
34extern unsigned long oops_begin(void); 34extern unsigned long oops_begin(void);
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index ef51b501e22a..24215072d0e1 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -48,6 +48,12 @@ static inline struct microcode_ops * __init init_intel_microcode(void)
48 48
49#ifdef CONFIG_MICROCODE_AMD 49#ifdef CONFIG_MICROCODE_AMD
50extern struct microcode_ops * __init init_amd_microcode(void); 50extern struct microcode_ops * __init init_amd_microcode(void);
51
52static inline void get_ucode_data(void *to, const u8 *from, size_t n)
53{
54 memcpy(to, from, n);
55}
56
51#else 57#else
52static inline struct microcode_ops * __init init_amd_microcode(void) 58static inline struct microcode_ops * __init init_amd_microcode(void)
53{ 59{
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index c82868e9f905..0c90dd9f0505 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -5,8 +5,9 @@
5 5
6#include <asm/mpspec_def.h> 6#include <asm/mpspec_def.h>
7#include <asm/x86_init.h> 7#include <asm/x86_init.h>
8#include <asm/apicdef.h>
8 9
9extern int apic_version[MAX_APICS]; 10extern int apic_version[];
10extern int pic_mode; 11extern int pic_mode;
11 12
12#ifdef CONFIG_X86_32 13#ifdef CONFIG_X86_32
@@ -107,7 +108,7 @@ extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level,
107 int active_high_low); 108 int active_high_low);
108#endif /* CONFIG_ACPI */ 109#endif /* CONFIG_ACPI */
109 110
110#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) 111#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_LOCAL_APIC)
111 112
112struct physid_mask { 113struct physid_mask {
113 unsigned long mask[PHYSID_ARRAY_SIZE]; 114 unsigned long mask[PHYSID_ARRAY_SIZE];
@@ -122,31 +123,31 @@ typedef struct physid_mask physid_mask_t;
122 test_and_set_bit(physid, (map).mask) 123 test_and_set_bit(physid, (map).mask)
123 124
124#define physids_and(dst, src1, src2) \ 125#define physids_and(dst, src1, src2) \
125 bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS) 126 bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC)
126 127
127#define physids_or(dst, src1, src2) \ 128#define physids_or(dst, src1, src2) \
128 bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS) 129 bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC)
129 130
130#define physids_clear(map) \ 131#define physids_clear(map) \
131 bitmap_zero((map).mask, MAX_APICS) 132 bitmap_zero((map).mask, MAX_LOCAL_APIC)
132 133
133#define physids_complement(dst, src) \ 134#define physids_complement(dst, src) \
134 bitmap_complement((dst).mask, (src).mask, MAX_APICS) 135 bitmap_complement((dst).mask, (src).mask, MAX_LOCAL_APIC)
135 136
136#define physids_empty(map) \ 137#define physids_empty(map) \
137 bitmap_empty((map).mask, MAX_APICS) 138 bitmap_empty((map).mask, MAX_LOCAL_APIC)
138 139
139#define physids_equal(map1, map2) \ 140#define physids_equal(map1, map2) \
140 bitmap_equal((map1).mask, (map2).mask, MAX_APICS) 141 bitmap_equal((map1).mask, (map2).mask, MAX_LOCAL_APIC)
141 142
142#define physids_weight(map) \ 143#define physids_weight(map) \
143 bitmap_weight((map).mask, MAX_APICS) 144 bitmap_weight((map).mask, MAX_LOCAL_APIC)
144 145
145#define physids_shift_right(d, s, n) \ 146#define physids_shift_right(d, s, n) \
146 bitmap_shift_right((d).mask, (s).mask, n, MAX_APICS) 147 bitmap_shift_right((d).mask, (s).mask, n, MAX_LOCAL_APIC)
147 148
148#define physids_shift_left(d, s, n) \ 149#define physids_shift_left(d, s, n) \
149 bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS) 150 bitmap_shift_left((d).mask, (s).mask, n, MAX_LOCAL_APIC)
150 151
151static inline unsigned long physids_coerce(physid_mask_t *map) 152static inline unsigned long physids_coerce(physid_mask_t *map)
152{ 153{
@@ -159,14 +160,6 @@ static inline void physids_promote(unsigned long physids, physid_mask_t *map)
159 map->mask[0] = physids; 160 map->mask[0] = physids;
160} 161}
161 162
162/* Note: will create very large stack frames if physid_mask_t is big */
163#define physid_mask_of_physid(physid) \
164 ({ \
165 physid_mask_t __physid_mask = PHYSID_MASK_NONE; \
166 physid_set(physid, __physid_mask); \
167 __physid_mask; \
168 })
169
170static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map) 163static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map)
171{ 164{
172 physids_clear(*map); 165 physids_clear(*map);
diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h
index 4a7f96d7c188..c0a955a9a087 100644
--- a/arch/x86/include/asm/mpspec_def.h
+++ b/arch/x86/include/asm/mpspec_def.h
@@ -15,13 +15,6 @@
15 15
16#ifdef CONFIG_X86_32 16#ifdef CONFIG_X86_32
17# define MAX_MPC_ENTRY 1024 17# define MAX_MPC_ENTRY 1024
18# define MAX_APICS 256
19#else
20# if NR_CPUS <= 255
21# define MAX_APICS 255
22# else
23# define MAX_APICS 32768
24# endif
25#endif 18#endif
26 19
27/* Intel MP Floating Pointer Structure */ 20/* Intel MP Floating Pointer Structure */
diff --git a/arch/x86/include/asm/mrst-vrtc.h b/arch/x86/include/asm/mrst-vrtc.h
new file mode 100644
index 000000000000..73668abdbedf
--- /dev/null
+++ b/arch/x86/include/asm/mrst-vrtc.h
@@ -0,0 +1,9 @@
1#ifndef _MRST_VRTC_H
2#define _MRST_VRTC_H
3
4extern unsigned char vrtc_cmos_read(unsigned char reg);
5extern void vrtc_cmos_write(unsigned char val, unsigned char reg);
6extern unsigned long vrtc_get_time(void);
7extern int vrtc_set_mmss(unsigned long nowtime);
8
9#endif
diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h
index 4a711a684b17..719f00b28ff5 100644
--- a/arch/x86/include/asm/mrst.h
+++ b/arch/x86/include/asm/mrst.h
@@ -14,7 +14,9 @@
14#include <linux/sfi.h> 14#include <linux/sfi.h>
15 15
16extern int pci_mrst_init(void); 16extern int pci_mrst_init(void);
17int __init sfi_parse_mrtc(struct sfi_table_header *table); 17extern int __init sfi_parse_mrtc(struct sfi_table_header *table);
18extern int sfi_mrtc_num;
19extern struct sfi_rtc_table_entry sfi_mrtc_array[];
18 20
19/* 21/*
20 * Medfield is the follow-up of Moorestown, it combines two chip solution into 22 * Medfield is the follow-up of Moorestown, it combines two chip solution into
@@ -50,4 +52,14 @@ extern void mrst_early_console_init(void);
50 52
51extern struct console early_hsu_console; 53extern struct console early_hsu_console;
52extern void hsu_early_console_init(void); 54extern void hsu_early_console_init(void);
55
56extern void intel_scu_devices_create(void);
57extern void intel_scu_devices_destroy(void);
58
59/* VRTC timer */
60#define MRST_VRTC_MAP_SZ (1024)
61/*#define MRST_VRTC_PGOFFSET (0xc00) */
62
63extern void mrst_rtc_init(void);
64
53#endif /* _ASM_X86_MRST_H */ 65#endif /* _ASM_X86_MRST_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 6b89f5e86021..86030f63ba02 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -123,6 +123,10 @@
123#define MSR_AMD64_IBSCTL 0xc001103a 123#define MSR_AMD64_IBSCTL 0xc001103a
124#define MSR_AMD64_IBSBRTARGET 0xc001103b 124#define MSR_AMD64_IBSBRTARGET 0xc001103b
125 125
126/* Fam 15h MSRs */
127#define MSR_F15H_PERF_CTL 0xc0010200
128#define MSR_F15H_PERF_CTR 0xc0010201
129
126/* Fam 10h MSRs */ 130/* Fam 10h MSRs */
127#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 131#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
128#define FAM10H_MMIO_CONF_ENABLE (1<<0) 132#define FAM10H_MMIO_CONF_ENABLE (1<<0)
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 932f0f86b4b7..c4021b953510 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -5,41 +5,15 @@
5#include <asm/irq.h> 5#include <asm/irq.h>
6#include <asm/io.h> 6#include <asm/io.h>
7 7
8#ifdef ARCH_HAS_NMI_WATCHDOG 8#ifdef CONFIG_X86_LOCAL_APIC
9
10/**
11 * do_nmi_callback
12 *
13 * Check to see if a callback exists and execute it. Return 1
14 * if the handler exists and was handled successfully.
15 */
16int do_nmi_callback(struct pt_regs *regs, int cpu);
17 9
18extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); 10extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
19extern int check_nmi_watchdog(void);
20#if !defined(CONFIG_LOCKUP_DETECTOR)
21extern int nmi_watchdog_enabled;
22#endif
23extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); 11extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
24extern int reserve_perfctr_nmi(unsigned int); 12extern int reserve_perfctr_nmi(unsigned int);
25extern void release_perfctr_nmi(unsigned int); 13extern void release_perfctr_nmi(unsigned int);
26extern int reserve_evntsel_nmi(unsigned int); 14extern int reserve_evntsel_nmi(unsigned int);
27extern void release_evntsel_nmi(unsigned int); 15extern void release_evntsel_nmi(unsigned int);
28 16
29extern void setup_apic_nmi_watchdog(void *);
30extern void stop_apic_nmi_watchdog(void *);
31extern void disable_timer_nmi_watchdog(void);
32extern void enable_timer_nmi_watchdog(void);
33extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason);
34extern void cpu_nmi_set_wd_enabled(void);
35
36extern atomic_t nmi_active;
37extern unsigned int nmi_watchdog;
38#define NMI_NONE 0
39#define NMI_IO_APIC 1
40#define NMI_LOCAL_APIC 2
41#define NMI_INVALID 3
42
43struct ctl_table; 17struct ctl_table;
44extern int proc_nmi_enabled(struct ctl_table *, int , 18extern int proc_nmi_enabled(struct ctl_table *, int ,
45 void __user *, size_t *, loff_t *); 19 void __user *, size_t *, loff_t *);
@@ -47,33 +21,8 @@ extern int unknown_nmi_panic;
47 21
48void arch_trigger_all_cpu_backtrace(void); 22void arch_trigger_all_cpu_backtrace(void);
49#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace 23#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
50
51static inline void localise_nmi_watchdog(void)
52{
53 if (nmi_watchdog == NMI_IO_APIC)
54 nmi_watchdog = NMI_LOCAL_APIC;
55}
56
57/* check if nmi_watchdog is active (ie was specified at boot) */
58static inline int nmi_watchdog_active(void)
59{
60 /*
61 * actually it should be:
62 * return (nmi_watchdog == NMI_LOCAL_APIC ||
63 * nmi_watchdog == NMI_IO_APIC)
64 * but since they are power of two we could use a
65 * cheaper way --cvg
66 */
67 return nmi_watchdog & (NMI_LOCAL_APIC | NMI_IO_APIC);
68}
69#endif 24#endif
70 25
71void lapic_watchdog_stop(void);
72int lapic_watchdog_init(unsigned nmi_hz);
73int lapic_wd_event(unsigned nmi_hz);
74unsigned lapic_adjust_nmi_hz(unsigned hz);
75void disable_lapic_nmi_watchdog(void);
76void enable_lapic_nmi_watchdog(void);
77void stop_nmi(void); 26void stop_nmi(void);
78void restart_nmi(void); 27void restart_nmi(void);
79 28
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index ca0437c714b2..676129229630 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -65,6 +65,7 @@ extern unsigned long pci_mem_start;
65 65
66#define PCIBIOS_MIN_CARDBUS_IO 0x4000 66#define PCIBIOS_MIN_CARDBUS_IO 0x4000
67 67
68extern int pcibios_enabled;
68void pcibios_config_init(void); 69void pcibios_config_init(void);
69struct pci_bus *pcibios_scan_root(int bus); 70struct pci_bus *pcibios_scan_root(int bus);
70 71
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 550e26b1dbb3..d9d4dae305f6 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -125,7 +125,6 @@ union cpuid10_edx {
125#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ 125#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
126 126
127#ifdef CONFIG_PERF_EVENTS 127#ifdef CONFIG_PERF_EVENTS
128extern void init_hw_perf_events(void);
129extern void perf_events_lapic_init(void); 128extern void perf_events_lapic_init(void);
130 129
131#define PERF_EVENT_INDEX_OFFSET 0 130#define PERF_EVENT_INDEX_OFFSET 0
@@ -156,7 +155,6 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
156} 155}
157 156
158#else 157#else
159static inline void init_hw_perf_events(void) { }
160static inline void perf_events_lapic_init(void) { } 158static inline void perf_events_lapic_init(void) { }
161#endif 159#endif
162 160
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index a70cd216be5d..295e2ff18a6a 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -744,14 +744,6 @@ enum P4_ESCR_EMASKS {
744}; 744};
745 745
746/* 746/*
747 * P4 PEBS specifics (Replay Event only)
748 *
749 * Format (bits):
750 * 0-6: metric from P4_PEBS_METRIC enum
751 * 7 : reserved
752 * 8 : reserved
753 * 9-11 : reserved
754 *
755 * Note we have UOP and PEBS bits reserved for now 747 * Note we have UOP and PEBS bits reserved for now
756 * just in case if we will need them once 748 * just in case if we will need them once
757 */ 749 */
@@ -788,5 +780,60 @@ enum P4_PEBS_METRIC {
788 P4_PEBS_METRIC__max 780 P4_PEBS_METRIC__max
789}; 781};
790 782
783/*
784 * Notes on internal configuration of ESCR+CCCR tuples
785 *
786 * Since P4 has quite the different architecture of
787 * performance registers in compare with "architectural"
788 * once and we have on 64 bits to keep configuration
789 * of performance event, the following trick is used.
790 *
791 * 1) Since both ESCR and CCCR registers have only low
792 * 32 bits valuable, we pack them into a single 64 bit
793 * configuration. Low 32 bits of such config correspond
794 * to low 32 bits of CCCR register and high 32 bits
795 * correspond to low 32 bits of ESCR register.
796 *
797 * 2) The meaning of every bit of such config field can
798 * be found in Intel SDM but it should be noted that
799 * we "borrow" some reserved bits for own usage and
800 * clean them or set to a proper value when we do
801 * a real write to hardware registers.
802 *
803 * 3) The format of bits of config is the following
804 * and should be either 0 or set to some predefined
805 * values:
806 *
807 * Low 32 bits
808 * -----------
809 * 0-6: P4_PEBS_METRIC enum
810 * 7-11: reserved
811 * 12: reserved (Enable)
812 * 13-15: reserved (ESCR select)
813 * 16-17: Active Thread
814 * 18: Compare
815 * 19: Complement
816 * 20-23: Threshold
817 * 24: Edge
818 * 25: reserved (FORCE_OVF)
819 * 26: reserved (OVF_PMI_T0)
820 * 27: reserved (OVF_PMI_T1)
821 * 28-29: reserved
822 * 30: reserved (Cascade)
823 * 31: reserved (OVF)
824 *
825 * High 32 bits
826 * ------------
827 * 0: reserved (T1_USR)
828 * 1: reserved (T1_OS)
829 * 2: reserved (T0_USR)
830 * 3: reserved (T0_OS)
831 * 4: Tag Enable
832 * 5-8: Tag Value
833 * 9-24: Event Mask (may use P4_ESCR_EMASK_BIT helper)
834 * 25-30: enum P4_EVENTS
835 * 31: reserved (HT thread)
836 */
837
791#endif /* PERF_EVENT_P4_H */ 838#endif /* PERF_EVENT_P4_H */
792 839
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index d6763b139a84..db8aa19a08a2 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -53,6 +53,12 @@ extern void x86_mrst_early_setup(void);
53static inline void x86_mrst_early_setup(void) { } 53static inline void x86_mrst_early_setup(void) { }
54#endif 54#endif
55 55
56#ifdef CONFIG_X86_INTEL_CE
57extern void x86_ce4100_early_setup(void);
58#else
59static inline void x86_ce4100_early_setup(void) { }
60#endif
61
56#ifndef _SETUP 62#ifndef _SETUP
57 63
58/* 64/*
diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h
index 1def60114906..6c22bf353f26 100644
--- a/arch/x86/include/asm/smpboot_hooks.h
+++ b/arch/x86/include/asm/smpboot_hooks.h
@@ -48,7 +48,6 @@ static inline void __init smpboot_setup_io_apic(void)
48 setup_IO_APIC(); 48 setup_IO_APIC();
49 else { 49 else {
50 nr_ioapics = 0; 50 nr_ioapics = 0;
51 localise_nmi_watchdog();
52 } 51 }
53#endif 52#endif
54} 53}
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 2b16a2ad23dc..52b5c7ed3608 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -7,6 +7,7 @@
7#define _ASM_X86_STACKTRACE_H 7#define _ASM_X86_STACKTRACE_H
8 8
9#include <linux/uaccess.h> 9#include <linux/uaccess.h>
10#include <linux/ptrace.h>
10 11
11extern int kstack_depth_to_print; 12extern int kstack_depth_to_print;
12 13
@@ -46,7 +47,7 @@ struct stacktrace_ops {
46}; 47};
47 48
48void dump_trace(struct task_struct *tsk, struct pt_regs *regs, 49void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
49 unsigned long *stack, unsigned long bp, 50 unsigned long *stack,
50 const struct stacktrace_ops *ops, void *data); 51 const struct stacktrace_ops *ops, void *data);
51 52
52#ifdef CONFIG_X86_32 53#ifdef CONFIG_X86_32
@@ -57,13 +58,39 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
57#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) 58#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
58#endif 59#endif
59 60
61#ifdef CONFIG_FRAME_POINTER
62static inline unsigned long
63stack_frame(struct task_struct *task, struct pt_regs *regs)
64{
65 unsigned long bp;
66
67 if (regs)
68 return regs->bp;
69
70 if (task == current) {
71 /* Grab bp right from our regs */
72 get_bp(bp);
73 return bp;
74 }
75
76 /* bp is the last reg pushed by switch_to */
77 return *(unsigned long *)task->thread.sp;
78}
79#else
80static inline unsigned long
81stack_frame(struct task_struct *task, struct pt_regs *regs)
82{
83 return 0;
84}
85#endif
86
60extern void 87extern void
61show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 88show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
62 unsigned long *stack, unsigned long bp, char *log_lvl); 89 unsigned long *stack, char *log_lvl);
63 90
64extern void 91extern void
65show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 92show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
66 unsigned long *sp, unsigned long bp, char *log_lvl); 93 unsigned long *sp, char *log_lvl);
67 94
68extern unsigned int code_bytes; 95extern unsigned int code_bytes;
69 96
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 5469630b27f5..fa7b9176b76c 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -10,12 +10,6 @@
10unsigned long long native_sched_clock(void); 10unsigned long long native_sched_clock(void);
11extern int recalibrate_cpu_khz(void); 11extern int recalibrate_cpu_khz(void);
12 12
13#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC)
14extern int timer_ack;
15#else
16# define timer_ack (0)
17#endif
18
19extern int no_timer_check; 13extern int no_timer_check;
20 14
21/* Accelerators for sched_clock() 15/* Accelerators for sched_clock()
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 1e994754d323..34244b2cd880 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -85,7 +85,6 @@ obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
85obj-$(CONFIG_KGDB) += kgdb.o 85obj-$(CONFIG_KGDB) += kgdb.o
86obj-$(CONFIG_VM86) += vm86_32.o 86obj-$(CONFIG_VM86) += vm86_32.o
87obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 87obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
88obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o
89 88
90obj-$(CONFIG_HPET_TIMER) += hpet.o 89obj-$(CONFIG_HPET_TIMER) += hpet.o
91obj-$(CONFIG_APB_TIMER) += apb_timer.o 90obj-$(CONFIG_APB_TIMER) += apb_timer.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 71232b941b6c..17c8090fabd4 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -198,6 +198,11 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled)
198{ 198{
199 unsigned int ver = 0; 199 unsigned int ver = 0;
200 200
201 if (id >= (MAX_LOCAL_APIC-1)) {
202 printk(KERN_INFO PREFIX "skipped apicid that is too big\n");
203 return;
204 }
205
201 if (!enabled) { 206 if (!enabled) {
202 ++disabled_cpus; 207 ++disabled_cpus;
203 return; 208 return;
@@ -910,13 +915,13 @@ static int __init acpi_parse_madt_lapic_entries(void)
910 acpi_register_lapic_address(acpi_lapic_addr); 915 acpi_register_lapic_address(acpi_lapic_addr);
911 916
912 count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC, 917 count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC,
913 acpi_parse_sapic, MAX_APICS); 918 acpi_parse_sapic, MAX_LOCAL_APIC);
914 919
915 if (!count) { 920 if (!count) {
916 x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC, 921 x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
917 acpi_parse_x2apic, MAX_APICS); 922 acpi_parse_x2apic, MAX_LOCAL_APIC);
918 count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, 923 count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
919 acpi_parse_lapic, MAX_APICS); 924 acpi_parse_lapic, MAX_LOCAL_APIC);
920 } 925 }
921 if (!count && !x2count) { 926 if (!count && !x2count) {
922 printk(KERN_ERR PREFIX "No LAPIC entries present\n"); 927 printk(KERN_ERR PREFIX "No LAPIC entries present\n");
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5079f24c955a..553d0b0d639b 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -591,17 +591,21 @@ static atomic_t stop_machine_first;
591static int wrote_text; 591static int wrote_text;
592 592
593struct text_poke_params { 593struct text_poke_params {
594 void *addr; 594 struct text_poke_param *params;
595 const void *opcode; 595 int nparams;
596 size_t len;
597}; 596};
598 597
599static int __kprobes stop_machine_text_poke(void *data) 598static int __kprobes stop_machine_text_poke(void *data)
600{ 599{
601 struct text_poke_params *tpp = data; 600 struct text_poke_params *tpp = data;
601 struct text_poke_param *p;
602 int i;
602 603
603 if (atomic_dec_and_test(&stop_machine_first)) { 604 if (atomic_dec_and_test(&stop_machine_first)) {
604 text_poke(tpp->addr, tpp->opcode, tpp->len); 605 for (i = 0; i < tpp->nparams; i++) {
606 p = &tpp->params[i];
607 text_poke(p->addr, p->opcode, p->len);
608 }
605 smp_wmb(); /* Make sure other cpus see that this has run */ 609 smp_wmb(); /* Make sure other cpus see that this has run */
606 wrote_text = 1; 610 wrote_text = 1;
607 } else { 611 } else {
@@ -610,8 +614,12 @@ static int __kprobes stop_machine_text_poke(void *data)
610 smp_mb(); /* Load wrote_text before following execution */ 614 smp_mb(); /* Load wrote_text before following execution */
611 } 615 }
612 616
613 flush_icache_range((unsigned long)tpp->addr, 617 for (i = 0; i < tpp->nparams; i++) {
614 (unsigned long)tpp->addr + tpp->len); 618 p = &tpp->params[i];
619 flush_icache_range((unsigned long)p->addr,
620 (unsigned long)p->addr + p->len);
621 }
622
615 return 0; 623 return 0;
616} 624}
617 625
@@ -631,10 +639,13 @@ static int __kprobes stop_machine_text_poke(void *data)
631void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) 639void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
632{ 640{
633 struct text_poke_params tpp; 641 struct text_poke_params tpp;
642 struct text_poke_param p;
634 643
635 tpp.addr = addr; 644 p.addr = addr;
636 tpp.opcode = opcode; 645 p.opcode = opcode;
637 tpp.len = len; 646 p.len = len;
647 tpp.params = &p;
648 tpp.nparams = 1;
638 atomic_set(&stop_machine_first, 1); 649 atomic_set(&stop_machine_first, 1);
639 wrote_text = 0; 650 wrote_text = 0;
640 /* Use __stop_machine() because the caller already got online_cpus. */ 651 /* Use __stop_machine() because the caller already got online_cpus. */
@@ -642,6 +653,26 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
642 return addr; 653 return addr;
643} 654}
644 655
656/**
657 * text_poke_smp_batch - Update instructions on a live kernel on SMP
658 * @params: an array of text_poke parameters
659 * @n: the number of elements in params.
660 *
661 * Modify multi-byte instruction by using stop_machine() on SMP. Since the
662 * stop_machine() is heavy task, it is better to aggregate text_poke requests
663 * and do it once if possible.
664 *
665 * Note: Must be called under get_online_cpus() and text_mutex.
666 */
667void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
668{
669 struct text_poke_params tpp = {.params = params, .nparams = n};
670
671 atomic_set(&stop_machine_first, 1);
672 wrote_text = 0;
673 stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
674}
675
645#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) 676#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
646 677
647#ifdef CONFIG_X86_64 678#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 8f6463d8ed0d..affacb5e0065 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -12,95 +12,116 @@
12 12
13static u32 *flush_words; 13static u32 *flush_words;
14 14
15struct pci_device_id k8_nb_ids[] = { 15struct pci_device_id amd_nb_misc_ids[] = {
16 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, 16 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
17 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, 17 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
18 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) }, 18 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) },
19 {} 19 {}
20}; 20};
21EXPORT_SYMBOL(k8_nb_ids); 21EXPORT_SYMBOL(amd_nb_misc_ids);
22 22
23struct k8_northbridge_info k8_northbridges; 23struct amd_northbridge_info amd_northbridges;
24EXPORT_SYMBOL(k8_northbridges); 24EXPORT_SYMBOL(amd_northbridges);
25 25
26static struct pci_dev *next_k8_northbridge(struct pci_dev *dev) 26static struct pci_dev *next_northbridge(struct pci_dev *dev,
27 struct pci_device_id *ids)
27{ 28{
28 do { 29 do {
29 dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); 30 dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
30 if (!dev) 31 if (!dev)
31 break; 32 break;
32 } while (!pci_match_id(&k8_nb_ids[0], dev)); 33 } while (!pci_match_id(ids, dev));
33 return dev; 34 return dev;
34} 35}
35 36
36int cache_k8_northbridges(void) 37int amd_cache_northbridges(void)
37{ 38{
38 int i; 39 int i = 0;
39 struct pci_dev *dev; 40 struct amd_northbridge *nb;
41 struct pci_dev *misc;
40 42
41 if (k8_northbridges.num) 43 if (amd_nb_num())
42 return 0; 44 return 0;
43 45
44 dev = NULL; 46 misc = NULL;
45 while ((dev = next_k8_northbridge(dev)) != NULL) 47 while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL)
46 k8_northbridges.num++; 48 i++;
47 49
48 /* some CPU families (e.g. family 0x11) do not support GART */ 50 if (i == 0)
49 if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || 51 return 0;
50 boot_cpu_data.x86 == 0x15)
51 k8_northbridges.gart_supported = 1;
52 52
53 k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) * 53 nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL);
54 sizeof(void *), GFP_KERNEL); 54 if (!nb)
55 if (!k8_northbridges.nb_misc)
56 return -ENOMEM; 55 return -ENOMEM;
57 56
58 if (!k8_northbridges.num) { 57 amd_northbridges.nb = nb;
59 k8_northbridges.nb_misc[0] = NULL; 58 amd_northbridges.num = i;
60 return 0;
61 }
62 59
63 if (k8_northbridges.gart_supported) { 60 misc = NULL;
64 flush_words = kmalloc(k8_northbridges.num * sizeof(u32), 61 for (i = 0; i != amd_nb_num(); i++) {
65 GFP_KERNEL); 62 node_to_amd_nb(i)->misc = misc =
66 if (!flush_words) { 63 next_northbridge(misc, amd_nb_misc_ids);
67 kfree(k8_northbridges.nb_misc); 64 }
68 return -ENOMEM; 65
69 } 66 /* some CPU families (e.g. family 0x11) do not support GART */
70 } 67 if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
68 boot_cpu_data.x86 == 0x15)
69 amd_northbridges.flags |= AMD_NB_GART;
70
71 /*
72 * Some CPU families support L3 Cache Index Disable. There are some
73 * limitations because of E382 and E388 on family 0x10.
74 */
75 if (boot_cpu_data.x86 == 0x10 &&
76 boot_cpu_data.x86_model >= 0x8 &&
77 (boot_cpu_data.x86_model > 0x9 ||
78 boot_cpu_data.x86_mask >= 0x1))
79 amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
71 80
72 dev = NULL;
73 i = 0;
74 while ((dev = next_k8_northbridge(dev)) != NULL) {
75 k8_northbridges.nb_misc[i] = dev;
76 if (k8_northbridges.gart_supported)
77 pci_read_config_dword(dev, 0x9c, &flush_words[i++]);
78 }
79 k8_northbridges.nb_misc[i] = NULL;
80 return 0; 81 return 0;
81} 82}
82EXPORT_SYMBOL_GPL(cache_k8_northbridges); 83EXPORT_SYMBOL_GPL(amd_cache_northbridges);
83 84
84/* Ignores subdevice/subvendor but as far as I can figure out 85/* Ignores subdevice/subvendor but as far as I can figure out
85 they're useless anyways */ 86 they're useless anyways */
86int __init early_is_k8_nb(u32 device) 87int __init early_is_amd_nb(u32 device)
87{ 88{
88 struct pci_device_id *id; 89 struct pci_device_id *id;
89 u32 vendor = device & 0xffff; 90 u32 vendor = device & 0xffff;
90 device >>= 16; 91 device >>= 16;
91 for (id = k8_nb_ids; id->vendor; id++) 92 for (id = amd_nb_misc_ids; id->vendor; id++)
92 if (vendor == id->vendor && device == id->device) 93 if (vendor == id->vendor && device == id->device)
93 return 1; 94 return 1;
94 return 0; 95 return 0;
95} 96}
96 97
97void k8_flush_garts(void) 98int amd_cache_gart(void)
99{
100 int i;
101
102 if (!amd_nb_has_feature(AMD_NB_GART))
103 return 0;
104
105 flush_words = kmalloc(amd_nb_num() * sizeof(u32), GFP_KERNEL);
106 if (!flush_words) {
107 amd_northbridges.flags &= ~AMD_NB_GART;
108 return -ENOMEM;
109 }
110
111 for (i = 0; i != amd_nb_num(); i++)
112 pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c,
113 &flush_words[i]);
114
115 return 0;
116}
117
118void amd_flush_garts(void)
98{ 119{
99 int flushed, i; 120 int flushed, i;
100 unsigned long flags; 121 unsigned long flags;
101 static DEFINE_SPINLOCK(gart_lock); 122 static DEFINE_SPINLOCK(gart_lock);
102 123
103 if (!k8_northbridges.gart_supported) 124 if (!amd_nb_has_feature(AMD_NB_GART))
104 return; 125 return;
105 126
106 /* Avoid races between AGP and IOMMU. In theory it's not needed 127 /* Avoid races between AGP and IOMMU. In theory it's not needed
@@ -109,16 +130,16 @@ void k8_flush_garts(void)
109 that it doesn't matter to serialize more. -AK */ 130 that it doesn't matter to serialize more. -AK */
110 spin_lock_irqsave(&gart_lock, flags); 131 spin_lock_irqsave(&gart_lock, flags);
111 flushed = 0; 132 flushed = 0;
112 for (i = 0; i < k8_northbridges.num; i++) { 133 for (i = 0; i < amd_nb_num(); i++) {
113 pci_write_config_dword(k8_northbridges.nb_misc[i], 0x9c, 134 pci_write_config_dword(node_to_amd_nb(i)->misc, 0x9c,
114 flush_words[i]|1); 135 flush_words[i] | 1);
115 flushed++; 136 flushed++;
116 } 137 }
117 for (i = 0; i < k8_northbridges.num; i++) { 138 for (i = 0; i < amd_nb_num(); i++) {
118 u32 w; 139 u32 w;
119 /* Make sure the hardware actually executed the flush*/ 140 /* Make sure the hardware actually executed the flush*/
120 for (;;) { 141 for (;;) {
121 pci_read_config_dword(k8_northbridges.nb_misc[i], 142 pci_read_config_dword(node_to_amd_nb(i)->misc,
122 0x9c, &w); 143 0x9c, &w);
123 if (!(w & 1)) 144 if (!(w & 1))
124 break; 145 break;
@@ -129,19 +150,23 @@ void k8_flush_garts(void)
129 if (!flushed) 150 if (!flushed)
130 printk("nothing to flush?\n"); 151 printk("nothing to flush?\n");
131} 152}
132EXPORT_SYMBOL_GPL(k8_flush_garts); 153EXPORT_SYMBOL_GPL(amd_flush_garts);
133 154
134static __init int init_k8_nbs(void) 155static __init int init_amd_nbs(void)
135{ 156{
136 int err = 0; 157 int err = 0;
137 158
138 err = cache_k8_northbridges(); 159 err = amd_cache_northbridges();
139 160
140 if (err < 0) 161 if (err < 0)
141 printk(KERN_NOTICE "K8 NB: Cannot enumerate AMD northbridges.\n"); 162 printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n");
163
164 if (amd_cache_gart() < 0)
165 printk(KERN_NOTICE "AMD NB: Cannot initialize GART flush words, "
166 "GART support disabled.\n");
142 167
143 return err; 168 return err;
144} 169}
145 170
146/* This has to go after the PCI subsystem */ 171/* This has to go after the PCI subsystem */
147fs_initcall(init_k8_nbs); 172fs_initcall(init_amd_nbs);
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 92543c73cf8e..7c9ab59653e8 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -315,6 +315,7 @@ static void apbt_setup_irq(struct apbt_dev *adev)
315 315
316 if (system_state == SYSTEM_BOOTING) { 316 if (system_state == SYSTEM_BOOTING) {
317 irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT); 317 irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT);
318 irq_set_affinity(adev->irq, cpumask_of(adev->cpu));
318 /* APB timer irqs are set up as mp_irqs, timer is edge type */ 319 /* APB timer irqs are set up as mp_irqs, timer is edge type */
319 __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge"); 320 __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge");
320 if (request_irq(adev->irq, apbt_interrupt_handler, 321 if (request_irq(adev->irq, apbt_interrupt_handler,
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index b3a16e8f0703..dcd7c83e1659 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -206,7 +206,7 @@ static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order)
206 * Do an PCI bus scan by hand because we're running before the PCI 206 * Do an PCI bus scan by hand because we're running before the PCI
207 * subsystem. 207 * subsystem.
208 * 208 *
209 * All K8 AGP bridges are AGPv3 compliant, so we can do this scan 209 * All AMD AGP bridges are AGPv3 compliant, so we can do this scan
210 * generically. It's probably overkill to always scan all slots because 210 * generically. It's probably overkill to always scan all slots because
211 * the AGP bridges should be always an own bus on the HT hierarchy, 211 * the AGP bridges should be always an own bus on the HT hierarchy,
212 * but do it here for future safety. 212 * but do it here for future safety.
@@ -303,7 +303,7 @@ void __init early_gart_iommu_check(void)
303 dev_limit = bus_dev_ranges[i].dev_limit; 303 dev_limit = bus_dev_ranges[i].dev_limit;
304 304
305 for (slot = dev_base; slot < dev_limit; slot++) { 305 for (slot = dev_base; slot < dev_limit; slot++) {
306 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) 306 if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
307 continue; 307 continue;
308 308
309 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); 309 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
@@ -358,7 +358,7 @@ void __init early_gart_iommu_check(void)
358 dev_limit = bus_dev_ranges[i].dev_limit; 358 dev_limit = bus_dev_ranges[i].dev_limit;
359 359
360 for (slot = dev_base; slot < dev_limit; slot++) { 360 for (slot = dev_base; slot < dev_limit; slot++) {
361 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) 361 if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
362 continue; 362 continue;
363 363
364 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); 364 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
@@ -400,7 +400,7 @@ int __init gart_iommu_hole_init(void)
400 dev_limit = bus_dev_ranges[i].dev_limit; 400 dev_limit = bus_dev_ranges[i].dev_limit;
401 401
402 for (slot = dev_base; slot < dev_limit; slot++) { 402 for (slot = dev_base; slot < dev_limit; slot++) {
403 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) 403 if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
404 continue; 404 continue;
405 405
406 iommu_detected = 1; 406 iommu_detected = 1;
@@ -518,7 +518,7 @@ out:
518 dev_base = bus_dev_ranges[i].dev_base; 518 dev_base = bus_dev_ranges[i].dev_base;
519 dev_limit = bus_dev_ranges[i].dev_limit; 519 dev_limit = bus_dev_ranges[i].dev_limit;
520 for (slot = dev_base; slot < dev_limit; slot++) { 520 for (slot = dev_base; slot < dev_limit; slot++) {
521 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) 521 if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
522 continue; 522 continue;
523 523
524 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); 524 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 910f20b457c4..3966b564ea47 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -3,10 +3,7 @@
3# 3#
4 4
5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o 5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o
6ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y) 6obj-y += hw_nmi.o
7obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
8endif
9obj-$(CONFIG_HARDLOCKUP_DETECTOR) += hw_nmi.o
10 7
11obj-$(CONFIG_X86_IO_APIC) += io_apic.o 8obj-$(CONFIG_X86_IO_APIC) += io_apic.o
12obj-$(CONFIG_SMP) += ipi.o 9obj-$(CONFIG_SMP) += ipi.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 78218135b48e..879999a5230f 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -31,7 +31,6 @@
31#include <linux/init.h> 31#include <linux/init.h>
32#include <linux/cpu.h> 32#include <linux/cpu.h>
33#include <linux/dmi.h> 33#include <linux/dmi.h>
34#include <linux/nmi.h>
35#include <linux/smp.h> 34#include <linux/smp.h>
36#include <linux/mm.h> 35#include <linux/mm.h>
37 36
@@ -432,17 +431,18 @@ int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
432 reserved = reserve_eilvt_offset(offset, new); 431 reserved = reserve_eilvt_offset(offset, new);
433 432
434 if (reserved != new) { 433 if (reserved != new) {
435 pr_err(FW_BUG "cpu %d, try to setup vector 0x%x, but " 434 pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
436 "vector 0x%x was already reserved by another core, " 435 "vector 0x%x, but the register is already in use for "
437 "APIC%lX=0x%x\n", 436 "vector 0x%x on another cpu\n",
438 smp_processor_id(), new, reserved, reg, old); 437 smp_processor_id(), reg, offset, new, reserved);
439 return -EINVAL; 438 return -EINVAL;
440 } 439 }
441 440
442 if (!eilvt_entry_is_changeable(old, new)) { 441 if (!eilvt_entry_is_changeable(old, new)) {
443 pr_err(FW_BUG "cpu %d, try to setup vector 0x%x but " 442 pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
444 "register already in use, APIC%lX=0x%x\n", 443 "vector 0x%x, but the register is already in use for "
445 smp_processor_id(), new, reg, old); 444 "vector 0x%x on this cpu\n",
445 smp_processor_id(), reg, offset, new, old);
446 return -EBUSY; 446 return -EBUSY;
447 } 447 }
448 448
@@ -799,11 +799,7 @@ void __init setup_boot_APIC_clock(void)
799 * PIT/HPET going. Otherwise register lapic as a dummy 799 * PIT/HPET going. Otherwise register lapic as a dummy
800 * device. 800 * device.
801 */ 801 */
802 if (nmi_watchdog != NMI_IO_APIC) 802 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
803 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
804 else
805 pr_warning("APIC timer registered as dummy,"
806 " due to nmi_watchdog=%d!\n", nmi_watchdog);
807 803
808 /* Setup the lapic or request the broadcast */ 804 /* Setup the lapic or request the broadcast */
809 setup_APIC_timer(); 805 setup_APIC_timer();
@@ -1387,7 +1383,6 @@ void __cpuinit end_local_APIC_setup(void)
1387 } 1383 }
1388#endif 1384#endif
1389 1385
1390 setup_apic_nmi_watchdog(NULL);
1391 apic_pm_activate(); 1386 apic_pm_activate();
1392 1387
1393 /* 1388 /*
@@ -1538,13 +1533,60 @@ static int __init detect_init_APIC(void)
1538 return 0; 1533 return 0;
1539} 1534}
1540#else 1535#else
1536
1537static int apic_verify(void)
1538{
1539 u32 features, h, l;
1540
1541 /*
1542 * The APIC feature bit should now be enabled
1543 * in `cpuid'
1544 */
1545 features = cpuid_edx(1);
1546 if (!(features & (1 << X86_FEATURE_APIC))) {
1547 pr_warning("Could not enable APIC!\n");
1548 return -1;
1549 }
1550 set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1551 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1552
1553 /* The BIOS may have set up the APIC at some other address */
1554 rdmsr(MSR_IA32_APICBASE, l, h);
1555 if (l & MSR_IA32_APICBASE_ENABLE)
1556 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
1557
1558 pr_info("Found and enabled local APIC!\n");
1559 return 0;
1560}
1561
1562int apic_force_enable(void)
1563{
1564 u32 h, l;
1565
1566 if (disable_apic)
1567 return -1;
1568
1569 /*
1570 * Some BIOSes disable the local APIC in the APIC_BASE
1571 * MSR. This can only be done in software for Intel P6 or later
1572 * and AMD K7 (Model > 1) or later.
1573 */
1574 rdmsr(MSR_IA32_APICBASE, l, h);
1575 if (!(l & MSR_IA32_APICBASE_ENABLE)) {
1576 pr_info("Local APIC disabled by BIOS -- reenabling.\n");
1577 l &= ~MSR_IA32_APICBASE_BASE;
1578 l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
1579 wrmsr(MSR_IA32_APICBASE, l, h);
1580 enabled_via_apicbase = 1;
1581 }
1582 return apic_verify();
1583}
1584
1541/* 1585/*
1542 * Detect and initialize APIC 1586 * Detect and initialize APIC
1543 */ 1587 */
1544static int __init detect_init_APIC(void) 1588static int __init detect_init_APIC(void)
1545{ 1589{
1546 u32 h, l, features;
1547
1548 /* Disabled by kernel option? */ 1590 /* Disabled by kernel option? */
1549 if (disable_apic) 1591 if (disable_apic)
1550 return -1; 1592 return -1;
@@ -1574,38 +1616,12 @@ static int __init detect_init_APIC(void)
1574 "you can enable it with \"lapic\"\n"); 1616 "you can enable it with \"lapic\"\n");
1575 return -1; 1617 return -1;
1576 } 1618 }
1577 /* 1619 if (apic_force_enable())
1578 * Some BIOSes disable the local APIC in the APIC_BASE 1620 return -1;
1579 * MSR. This can only be done in software for Intel P6 or later 1621 } else {
1580 * and AMD K7 (Model > 1) or later. 1622 if (apic_verify())
1581 */ 1623 return -1;
1582 rdmsr(MSR_IA32_APICBASE, l, h);
1583 if (!(l & MSR_IA32_APICBASE_ENABLE)) {
1584 pr_info("Local APIC disabled by BIOS -- reenabling.\n");
1585 l &= ~MSR_IA32_APICBASE_BASE;
1586 l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
1587 wrmsr(MSR_IA32_APICBASE, l, h);
1588 enabled_via_apicbase = 1;
1589 }
1590 }
1591 /*
1592 * The APIC feature bit should now be enabled
1593 * in `cpuid'
1594 */
1595 features = cpuid_edx(1);
1596 if (!(features & (1 << X86_FEATURE_APIC))) {
1597 pr_warning("Could not enable APIC!\n");
1598 return -1;
1599 } 1624 }
1600 set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1601 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1602
1603 /* The BIOS may have set up the APIC at some other address */
1604 rdmsr(MSR_IA32_APICBASE, l, h);
1605 if (l & MSR_IA32_APICBASE_ENABLE)
1606 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
1607
1608 pr_info("Found and enabled local APIC!\n");
1609 1625
1610 apic_pm_activate(); 1626 apic_pm_activate();
1611 1627
@@ -1693,7 +1709,7 @@ void __init init_apic_mappings(void)
1693 * This initializes the IO-APIC and APIC hardware if this is 1709 * This initializes the IO-APIC and APIC hardware if this is
1694 * a UP kernel. 1710 * a UP kernel.
1695 */ 1711 */
1696int apic_version[MAX_APICS]; 1712int apic_version[MAX_LOCAL_APIC];
1697 1713
1698int __init APIC_init_uniprocessor(void) 1714int __init APIC_init_uniprocessor(void)
1699{ 1715{
@@ -1758,17 +1774,10 @@ int __init APIC_init_uniprocessor(void)
1758 setup_IO_APIC(); 1774 setup_IO_APIC();
1759 else { 1775 else {
1760 nr_ioapics = 0; 1776 nr_ioapics = 0;
1761 localise_nmi_watchdog();
1762 } 1777 }
1763#else
1764 localise_nmi_watchdog();
1765#endif 1778#endif
1766 1779
1767 x86_init.timers.setup_percpu_clockev(); 1780 x86_init.timers.setup_percpu_clockev();
1768#ifdef CONFIG_X86_64
1769 check_nmi_watchdog();
1770#endif
1771
1772 return 0; 1781 return 0;
1773} 1782}
1774 1783
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 62f6e1e55b90..72ec29e1ae06 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -17,20 +17,31 @@
17#include <linux/nmi.h> 17#include <linux/nmi.h>
18#include <linux/module.h> 18#include <linux/module.h>
19 19
20#ifdef CONFIG_HARDLOCKUP_DETECTOR
20u64 hw_nmi_get_sample_period(void) 21u64 hw_nmi_get_sample_period(void)
21{ 22{
22 return (u64)(cpu_khz) * 1000 * 60; 23 return (u64)(cpu_khz) * 1000 * 60;
23} 24}
25#endif
24 26
25#ifdef ARCH_HAS_NMI_WATCHDOG 27#ifdef arch_trigger_all_cpu_backtrace
26
27/* For reliability, we're prepared to waste bits here. */ 28/* For reliability, we're prepared to waste bits here. */
28static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; 29static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
29 30
31/* "in progress" flag of arch_trigger_all_cpu_backtrace */
32static unsigned long backtrace_flag;
33
30void arch_trigger_all_cpu_backtrace(void) 34void arch_trigger_all_cpu_backtrace(void)
31{ 35{
32 int i; 36 int i;
33 37
38 if (test_and_set_bit(0, &backtrace_flag))
39 /*
40 * If there is already a trigger_all_cpu_backtrace() in progress
41 * (backtrace_flag == 1), don't output double cpu dump infos.
42 */
43 return;
44
34 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); 45 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
35 46
36 printk(KERN_INFO "sending NMI to all CPUs:\n"); 47 printk(KERN_INFO "sending NMI to all CPUs:\n");
@@ -42,6 +53,9 @@ void arch_trigger_all_cpu_backtrace(void)
42 break; 53 break;
43 mdelay(1); 54 mdelay(1);
44 } 55 }
56
57 clear_bit(0, &backtrace_flag);
58 smp_mb__after_clear_bit();
45} 59}
46 60
47static int __kprobes 61static int __kprobes
@@ -50,7 +64,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
50{ 64{
51 struct die_args *args = __args; 65 struct die_args *args = __args;
52 struct pt_regs *regs; 66 struct pt_regs *regs;
53 int cpu = smp_processor_id(); 67 int cpu;
54 68
55 switch (cmd) { 69 switch (cmd) {
56 case DIE_NMI: 70 case DIE_NMI:
@@ -62,6 +76,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
62 } 76 }
63 77
64 regs = args->regs; 78 regs = args->regs;
79 cpu = smp_processor_id();
65 80
66 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { 81 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
67 static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; 82 static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -91,18 +106,3 @@ static int __init register_trigger_all_cpu_backtrace(void)
91} 106}
92early_initcall(register_trigger_all_cpu_backtrace); 107early_initcall(register_trigger_all_cpu_backtrace);
93#endif 108#endif
94
95/* STUB calls to mimic old nmi_watchdog behaviour */
96#if defined(CONFIG_X86_LOCAL_APIC)
97unsigned int nmi_watchdog = NMI_NONE;
98EXPORT_SYMBOL(nmi_watchdog);
99void acpi_nmi_enable(void) { return; }
100void acpi_nmi_disable(void) { return; }
101#endif
102atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
103EXPORT_SYMBOL(nmi_active);
104int unknown_nmi_panic;
105void cpu_nmi_set_wd_enabled(void) { return; }
106void stop_apic_nmi_watchdog(void *unused) { return; }
107void setup_apic_nmi_watchdog(void *unused) { return; }
108int __init check_nmi_watchdog(void) { return 0; }
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index fadcd743a74f..f6cd5b410770 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -54,7 +54,6 @@
54#include <asm/dma.h> 54#include <asm/dma.h>
55#include <asm/timer.h> 55#include <asm/timer.h>
56#include <asm/i8259.h> 56#include <asm/i8259.h>
57#include <asm/nmi.h>
58#include <asm/msidef.h> 57#include <asm/msidef.h>
59#include <asm/hypertransport.h> 58#include <asm/hypertransport.h>
60#include <asm/setup.h> 59#include <asm/setup.h>
@@ -1934,8 +1933,7 @@ void disable_IO_APIC(void)
1934 * 1933 *
1935 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 1934 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
1936 */ 1935 */
1937 1936void __init setup_ioapic_ids_from_mpc_nocheck(void)
1938void __init setup_ioapic_ids_from_mpc(void)
1939{ 1937{
1940 union IO_APIC_reg_00 reg_00; 1938 union IO_APIC_reg_00 reg_00;
1941 physid_mask_t phys_id_present_map; 1939 physid_mask_t phys_id_present_map;
@@ -1944,15 +1942,6 @@ void __init setup_ioapic_ids_from_mpc(void)
1944 unsigned char old_id; 1942 unsigned char old_id;
1945 unsigned long flags; 1943 unsigned long flags;
1946 1944
1947 if (acpi_ioapic)
1948 return;
1949 /*
1950 * Don't check I/O APIC IDs for xAPIC systems. They have
1951 * no meaning without the serial APIC bus.
1952 */
1953 if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
1954 || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
1955 return;
1956 /* 1945 /*
1957 * This is broken; anything with a real cpu count has to 1946 * This is broken; anything with a real cpu count has to
1958 * circumvent this idiocy regardless. 1947 * circumvent this idiocy regardless.
@@ -2006,7 +1995,6 @@ void __init setup_ioapic_ids_from_mpc(void)
2006 physids_or(phys_id_present_map, phys_id_present_map, tmp); 1995 physids_or(phys_id_present_map, phys_id_present_map, tmp);
2007 } 1996 }
2008 1997
2009
2010 /* 1998 /*
2011 * We need to adjust the IRQ routing table 1999 * We need to adjust the IRQ routing table
2012 * if the ID changed. 2000 * if the ID changed.
@@ -2042,6 +2030,21 @@ void __init setup_ioapic_ids_from_mpc(void)
2042 apic_printk(APIC_VERBOSE, " ok.\n"); 2030 apic_printk(APIC_VERBOSE, " ok.\n");
2043 } 2031 }
2044} 2032}
2033
2034void __init setup_ioapic_ids_from_mpc(void)
2035{
2036
2037 if (acpi_ioapic)
2038 return;
2039 /*
2040 * Don't check I/O APIC IDs for xAPIC systems. They have
2041 * no meaning without the serial APIC bus.
2042 */
2043 if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
2044 || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
2045 return;
2046 setup_ioapic_ids_from_mpc_nocheck();
2047}
2045#endif 2048#endif
2046 2049
2047int no_timer_check __initdata; 2050int no_timer_check __initdata;
@@ -2642,24 +2645,6 @@ static void lapic_register_intr(int irq)
2642 "edge"); 2645 "edge");
2643} 2646}
2644 2647
2645static void __init setup_nmi(void)
2646{
2647 /*
2648 * Dirty trick to enable the NMI watchdog ...
2649 * We put the 8259A master into AEOI mode and
2650 * unmask on all local APICs LVT0 as NMI.
2651 *
2652 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
2653 * is from Maciej W. Rozycki - so we do not have to EOI from
2654 * the NMI handler or the timer interrupt.
2655 */
2656 apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
2657
2658 enable_NMI_through_LVT0();
2659
2660 apic_printk(APIC_VERBOSE, " done.\n");
2661}
2662
2663/* 2648/*
2664 * This looks a bit hackish but it's about the only one way of sending 2649 * This looks a bit hackish but it's about the only one way of sending
2665 * a few INTA cycles to 8259As and any associated glue logic. ICR does 2650 * a few INTA cycles to 8259As and any associated glue logic. ICR does
@@ -2765,15 +2750,6 @@ static inline void __init check_timer(void)
2765 */ 2750 */
2766 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 2751 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2767 legacy_pic->init(1); 2752 legacy_pic->init(1);
2768#ifdef CONFIG_X86_32
2769 {
2770 unsigned int ver;
2771
2772 ver = apic_read(APIC_LVR);
2773 ver = GET_APIC_VERSION(ver);
2774 timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
2775 }
2776#endif
2777 2753
2778 pin1 = find_isa_irq_pin(0, mp_INT); 2754 pin1 = find_isa_irq_pin(0, mp_INT);
2779 apic1 = find_isa_irq_apic(0, mp_INT); 2755 apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2821,10 +2797,6 @@ static inline void __init check_timer(void)
2821 unmask_ioapic(cfg); 2797 unmask_ioapic(cfg);
2822 } 2798 }
2823 if (timer_irq_works()) { 2799 if (timer_irq_works()) {
2824 if (nmi_watchdog == NMI_IO_APIC) {
2825 setup_nmi();
2826 legacy_pic->unmask(0);
2827 }
2828 if (disable_timer_pin_1 > 0) 2800 if (disable_timer_pin_1 > 0)
2829 clear_IO_APIC_pin(0, pin1); 2801 clear_IO_APIC_pin(0, pin1);
2830 goto out; 2802 goto out;
@@ -2850,11 +2822,6 @@ static inline void __init check_timer(void)
2850 if (timer_irq_works()) { 2822 if (timer_irq_works()) {
2851 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); 2823 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
2852 timer_through_8259 = 1; 2824 timer_through_8259 = 1;
2853 if (nmi_watchdog == NMI_IO_APIC) {
2854 legacy_pic->mask(0);
2855 setup_nmi();
2856 legacy_pic->unmask(0);
2857 }
2858 goto out; 2825 goto out;
2859 } 2826 }
2860 /* 2827 /*
@@ -2866,15 +2833,6 @@ static inline void __init check_timer(void)
2866 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); 2833 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
2867 } 2834 }
2868 2835
2869 if (nmi_watchdog == NMI_IO_APIC) {
2870 apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
2871 "through the IO-APIC - disabling NMI Watchdog!\n");
2872 nmi_watchdog = NMI_NONE;
2873 }
2874#ifdef CONFIG_X86_32
2875 timer_ack = 0;
2876#endif
2877
2878 apic_printk(APIC_QUIET, KERN_INFO 2836 apic_printk(APIC_QUIET, KERN_INFO
2879 "...trying to set up timer as Virtual Wire IRQ...\n"); 2837 "...trying to set up timer as Virtual Wire IRQ...\n");
2880 2838
@@ -3639,7 +3597,7 @@ int __init io_apic_get_redir_entries (int ioapic)
3639 return reg_01.bits.entries + 1; 3597 return reg_01.bits.entries + 1;
3640} 3598}
3641 3599
3642void __init probe_nr_irqs_gsi(void) 3600static void __init probe_nr_irqs_gsi(void)
3643{ 3601{
3644 int nr; 3602 int nr;
3645 3603
@@ -3956,7 +3914,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics)
3956 return res; 3914 return res;
3957} 3915}
3958 3916
3959void __init ioapic_init_mappings(void) 3917void __init ioapic_and_gsi_init(void)
3960{ 3918{
3961 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; 3919 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
3962 struct resource *ioapic_res; 3920 struct resource *ioapic_res;
@@ -3994,6 +3952,8 @@ fake_ioapic_page:
3994 ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1; 3952 ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
3995 ioapic_res++; 3953 ioapic_res++;
3996 } 3954 }
3955
3956 probe_nr_irqs_gsi();
3997} 3957}
3998 3958
3999void __init ioapic_insert_resources(void) 3959void __init ioapic_insert_resources(void)
@@ -4103,7 +4063,8 @@ void __init pre_init_apic_IRQ0(void)
4103 4063
4104 printk(KERN_INFO "Early APIC setup for system timer0\n"); 4064 printk(KERN_INFO "Early APIC setup for system timer0\n");
4105#ifndef CONFIG_SMP 4065#ifndef CONFIG_SMP
4106 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); 4066 physid_set_mask_of_physid(boot_cpu_physical_apicid,
4067 &phys_cpu_present_map);
4107#endif 4068#endif
4108 /* Make sure the irq descriptor is set up */ 4069 /* Make sure the irq descriptor is set up */
4109 cfg = alloc_irq_and_cfg_at(0, 0); 4070 cfg = alloc_irq_and_cfg_at(0, 0);
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
deleted file mode 100644
index c90041ccb742..000000000000
--- a/arch/x86/kernel/apic/nmi.c
+++ /dev/null
@@ -1,567 +0,0 @@
1/*
2 * NMI watchdog support on APIC systems
3 *
4 * Started by Ingo Molnar <mingo@redhat.com>
5 *
6 * Fixes:
7 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
8 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
9 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
10 * Pavel Machek and
11 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
12 */
13
14#include <asm/apic.h>
15
16#include <linux/nmi.h>
17#include <linux/mm.h>
18#include <linux/delay.h>
19#include <linux/interrupt.h>
20#include <linux/module.h>
21#include <linux/slab.h>
22#include <linux/sysdev.h>
23#include <linux/sysctl.h>
24#include <linux/percpu.h>
25#include <linux/kprobes.h>
26#include <linux/cpumask.h>
27#include <linux/kernel_stat.h>
28#include <linux/kdebug.h>
29#include <linux/smp.h>
30
31#include <asm/i8259.h>
32#include <asm/io_apic.h>
33#include <asm/proto.h>
34#include <asm/timer.h>
35
36#include <asm/mce.h>
37
38#include <asm/mach_traps.h>
39
40int unknown_nmi_panic;
41int nmi_watchdog_enabled;
42
43/* For reliability, we're prepared to waste bits here. */
44static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
45
46/* nmi_active:
47 * >0: the lapic NMI watchdog is active, but can be disabled
48 * <0: the lapic NMI watchdog has not been set up, and cannot
49 * be enabled
50 * 0: the lapic NMI watchdog is disabled, but can be enabled
51 */
52atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
53EXPORT_SYMBOL(nmi_active);
54
55unsigned int nmi_watchdog = NMI_NONE;
56EXPORT_SYMBOL(nmi_watchdog);
57
58static int panic_on_timeout;
59
60static unsigned int nmi_hz = HZ;
61static DEFINE_PER_CPU(short, wd_enabled);
62static int endflag __initdata;
63
64static inline unsigned int get_nmi_count(int cpu)
65{
66 return per_cpu(irq_stat, cpu).__nmi_count;
67}
68
69static inline int mce_in_progress(void)
70{
71#if defined(CONFIG_X86_MCE)
72 return atomic_read(&mce_entry) > 0;
73#endif
74 return 0;
75}
76
77/*
78 * Take the local apic timer and PIT/HPET into account. We don't
79 * know which one is active, when we have highres/dyntick on
80 */
81static inline unsigned int get_timer_irqs(int cpu)
82{
83 return per_cpu(irq_stat, cpu).apic_timer_irqs +
84 per_cpu(irq_stat, cpu).irq0_irqs;
85}
86
87#ifdef CONFIG_SMP
88/*
89 * The performance counters used by NMI_LOCAL_APIC don't trigger when
90 * the CPU is idle. To make sure the NMI watchdog really ticks on all
91 * CPUs during the test make them busy.
92 */
93static __init void nmi_cpu_busy(void *data)
94{
95 local_irq_enable_in_hardirq();
96 /*
97 * Intentionally don't use cpu_relax here. This is
98 * to make sure that the performance counter really ticks,
99 * even if there is a simulator or similar that catches the
100 * pause instruction. On a real HT machine this is fine because
101 * all other CPUs are busy with "useless" delay loops and don't
102 * care if they get somewhat less cycles.
103 */
104 while (endflag == 0)
105 mb();
106}
107#endif
108
109static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count)
110{
111 printk(KERN_CONT "\n");
112
113 printk(KERN_WARNING
114 "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
115 cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
116
117 printk(KERN_WARNING
118 "Please report this to bugzilla.kernel.org,\n");
119 printk(KERN_WARNING
120 "and attach the output of the 'dmesg' command.\n");
121
122 per_cpu(wd_enabled, cpu) = 0;
123 atomic_dec(&nmi_active);
124}
125
126static void __acpi_nmi_disable(void *__unused)
127{
128 apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
129}
130
131int __init check_nmi_watchdog(void)
132{
133 unsigned int *prev_nmi_count;
134 int cpu;
135
136 if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
137 return 0;
138
139 prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
140 if (!prev_nmi_count)
141 goto error;
142
143 printk(KERN_INFO "Testing NMI watchdog ... ");
144
145#ifdef CONFIG_SMP
146 if (nmi_watchdog == NMI_LOCAL_APIC)
147 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
148#endif
149
150 for_each_possible_cpu(cpu)
151 prev_nmi_count[cpu] = get_nmi_count(cpu);
152 local_irq_enable();
153 mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
154
155 for_each_online_cpu(cpu) {
156 if (!per_cpu(wd_enabled, cpu))
157 continue;
158 if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
159 report_broken_nmi(cpu, prev_nmi_count);
160 }
161 endflag = 1;
162 if (!atomic_read(&nmi_active)) {
163 kfree(prev_nmi_count);
164 atomic_set(&nmi_active, -1);
165 goto error;
166 }
167 printk("OK.\n");
168
169 /*
170 * now that we know it works we can reduce NMI frequency to
171 * something more reasonable; makes a difference in some configs
172 */
173 if (nmi_watchdog == NMI_LOCAL_APIC)
174 nmi_hz = lapic_adjust_nmi_hz(1);
175
176 kfree(prev_nmi_count);
177 return 0;
178error:
179 if (nmi_watchdog == NMI_IO_APIC) {
180 if (!timer_through_8259)
181 legacy_pic->mask(0);
182 on_each_cpu(__acpi_nmi_disable, NULL, 1);
183 }
184
185#ifdef CONFIG_X86_32
186 timer_ack = 0;
187#endif
188 return -1;
189}
190
191static int __init setup_nmi_watchdog(char *str)
192{
193 unsigned int nmi;
194
195 if (!strncmp(str, "panic", 5)) {
196 panic_on_timeout = 1;
197 str = strchr(str, ',');
198 if (!str)
199 return 1;
200 ++str;
201 }
202
203 if (!strncmp(str, "lapic", 5))
204 nmi_watchdog = NMI_LOCAL_APIC;
205 else if (!strncmp(str, "ioapic", 6))
206 nmi_watchdog = NMI_IO_APIC;
207 else {
208 get_option(&str, &nmi);
209 if (nmi >= NMI_INVALID)
210 return 0;
211 nmi_watchdog = nmi;
212 }
213
214 return 1;
215}
216__setup("nmi_watchdog=", setup_nmi_watchdog);
217
218/*
219 * Suspend/resume support
220 */
221#ifdef CONFIG_PM
222
223static int nmi_pm_active; /* nmi_active before suspend */
224
225static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
226{
227 /* only CPU0 goes here, other CPUs should be offline */
228 nmi_pm_active = atomic_read(&nmi_active);
229 stop_apic_nmi_watchdog(NULL);
230 BUG_ON(atomic_read(&nmi_active) != 0);
231 return 0;
232}
233
234static int lapic_nmi_resume(struct sys_device *dev)
235{
236 /* only CPU0 goes here, other CPUs should be offline */
237 if (nmi_pm_active > 0) {
238 setup_apic_nmi_watchdog(NULL);
239 touch_nmi_watchdog();
240 }
241 return 0;
242}
243
244static struct sysdev_class nmi_sysclass = {
245 .name = "lapic_nmi",
246 .resume = lapic_nmi_resume,
247 .suspend = lapic_nmi_suspend,
248};
249
250static struct sys_device device_lapic_nmi = {
251 .id = 0,
252 .cls = &nmi_sysclass,
253};
254
255static int __init init_lapic_nmi_sysfs(void)
256{
257 int error;
258
259 /*
260 * should really be a BUG_ON but b/c this is an
261 * init call, it just doesn't work. -dcz
262 */
263 if (nmi_watchdog != NMI_LOCAL_APIC)
264 return 0;
265
266 if (atomic_read(&nmi_active) < 0)
267 return 0;
268
269 error = sysdev_class_register(&nmi_sysclass);
270 if (!error)
271 error = sysdev_register(&device_lapic_nmi);
272 return error;
273}
274
275/* must come after the local APIC's device_initcall() */
276late_initcall(init_lapic_nmi_sysfs);
277
278#endif /* CONFIG_PM */
279
280static void __acpi_nmi_enable(void *__unused)
281{
282 apic_write(APIC_LVT0, APIC_DM_NMI);
283}
284
285/*
286 * Enable timer based NMIs on all CPUs:
287 */
288void acpi_nmi_enable(void)
289{
290 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
291 on_each_cpu(__acpi_nmi_enable, NULL, 1);
292}
293
294/*
295 * Disable timer based NMIs on all CPUs:
296 */
297void acpi_nmi_disable(void)
298{
299 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
300 on_each_cpu(__acpi_nmi_disable, NULL, 1);
301}
302
303/*
304 * This function is called as soon the LAPIC NMI watchdog driver has everything
305 * in place and it's ready to check if the NMIs belong to the NMI watchdog
306 */
307void cpu_nmi_set_wd_enabled(void)
308{
309 __get_cpu_var(wd_enabled) = 1;
310}
311
312void setup_apic_nmi_watchdog(void *unused)
313{
314 if (__get_cpu_var(wd_enabled))
315 return;
316
317 /* cheap hack to support suspend/resume */
318 /* if cpu0 is not active neither should the other cpus */
319 if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0)
320 return;
321
322 switch (nmi_watchdog) {
323 case NMI_LOCAL_APIC:
324 if (lapic_watchdog_init(nmi_hz) < 0) {
325 __get_cpu_var(wd_enabled) = 0;
326 return;
327 }
328 /* FALL THROUGH */
329 case NMI_IO_APIC:
330 __get_cpu_var(wd_enabled) = 1;
331 atomic_inc(&nmi_active);
332 }
333}
334
335void stop_apic_nmi_watchdog(void *unused)
336{
337 /* only support LOCAL and IO APICs for now */
338 if (!nmi_watchdog_active())
339 return;
340 if (__get_cpu_var(wd_enabled) == 0)
341 return;
342 if (nmi_watchdog == NMI_LOCAL_APIC)
343 lapic_watchdog_stop();
344 else
345 __acpi_nmi_disable(NULL);
346 __get_cpu_var(wd_enabled) = 0;
347 atomic_dec(&nmi_active);
348}
349
350/*
351 * the best way to detect whether a CPU has a 'hard lockup' problem
352 * is to check it's local APIC timer IRQ counts. If they are not
353 * changing then that CPU has some problem.
354 *
355 * as these watchdog NMI IRQs are generated on every CPU, we only
356 * have to check the current processor.
357 *
358 * since NMIs don't listen to _any_ locks, we have to be extremely
359 * careful not to rely on unsafe variables. The printk might lock
360 * up though, so we have to break up any console locks first ...
361 * [when there will be more tty-related locks, break them up here too!]
362 */
363
364static DEFINE_PER_CPU(unsigned, last_irq_sum);
365static DEFINE_PER_CPU(long, alert_counter);
366static DEFINE_PER_CPU(int, nmi_touch);
367
368void touch_nmi_watchdog(void)
369{
370 if (nmi_watchdog_active()) {
371 unsigned cpu;
372
373 /*
374 * Tell other CPUs to reset their alert counters. We cannot
375 * do it ourselves because the alert count increase is not
376 * atomic.
377 */
378 for_each_present_cpu(cpu) {
379 if (per_cpu(nmi_touch, cpu) != 1)
380 per_cpu(nmi_touch, cpu) = 1;
381 }
382 }
383
384 /*
385 * Tickle the softlockup detector too:
386 */
387 touch_softlockup_watchdog();
388}
389EXPORT_SYMBOL(touch_nmi_watchdog);
390
391notrace __kprobes int
392nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
393{
394 /*
395 * Since current_thread_info()-> is always on the stack, and we
396 * always switch the stack NMI-atomically, it's safe to use
397 * smp_processor_id().
398 */
399 unsigned int sum;
400 int touched = 0;
401 int cpu = smp_processor_id();
402 int rc = 0;
403
404 sum = get_timer_irqs(cpu);
405
406 if (__get_cpu_var(nmi_touch)) {
407 __get_cpu_var(nmi_touch) = 0;
408 touched = 1;
409 }
410
411 /* We can be called before check_nmi_watchdog, hence NULL check. */
412 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
413 static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */
414
415 raw_spin_lock(&lock);
416 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
417 show_regs(regs);
418 dump_stack();
419 raw_spin_unlock(&lock);
420 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
421
422 rc = 1;
423 }
424
425 /* Could check oops_in_progress here too, but it's safer not to */
426 if (mce_in_progress())
427 touched = 1;
428
429 /* if the none of the timers isn't firing, this cpu isn't doing much */
430 if (!touched && __get_cpu_var(last_irq_sum) == sum) {
431 /*
432 * Ayiee, looks like this CPU is stuck ...
433 * wait a few IRQs (5 seconds) before doing the oops ...
434 */
435 __this_cpu_inc(alert_counter);
436 if (__this_cpu_read(alert_counter) == 5 * nmi_hz)
437 /*
438 * die_nmi will return ONLY if NOTIFY_STOP happens..
439 */
440 die_nmi("BUG: NMI Watchdog detected LOCKUP",
441 regs, panic_on_timeout);
442 } else {
443 __get_cpu_var(last_irq_sum) = sum;
444 __this_cpu_write(alert_counter, 0);
445 }
446
447 /* see if the nmi watchdog went off */
448 if (!__get_cpu_var(wd_enabled))
449 return rc;
450 switch (nmi_watchdog) {
451 case NMI_LOCAL_APIC:
452 rc |= lapic_wd_event(nmi_hz);
453 break;
454 case NMI_IO_APIC:
455 /*
456 * don't know how to accurately check for this.
457 * just assume it was a watchdog timer interrupt
458 * This matches the old behaviour.
459 */
460 rc = 1;
461 break;
462 }
463 return rc;
464}
465
466#ifdef CONFIG_SYSCTL
467
468static void enable_ioapic_nmi_watchdog_single(void *unused)
469{
470 __get_cpu_var(wd_enabled) = 1;
471 atomic_inc(&nmi_active);
472 __acpi_nmi_enable(NULL);
473}
474
475static void enable_ioapic_nmi_watchdog(void)
476{
477 on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1);
478 touch_nmi_watchdog();
479}
480
481static void disable_ioapic_nmi_watchdog(void)
482{
483 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
484}
485
486static int __init setup_unknown_nmi_panic(char *str)
487{
488 unknown_nmi_panic = 1;
489 return 1;
490}
491__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
492
493static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
494{
495 unsigned char reason = get_nmi_reason();
496 char buf[64];
497
498 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
499 die_nmi(buf, regs, 1); /* Always panic here */
500 return 0;
501}
502
503/*
504 * proc handler for /proc/sys/kernel/nmi
505 */
506int proc_nmi_enabled(struct ctl_table *table, int write,
507 void __user *buffer, size_t *length, loff_t *ppos)
508{
509 int old_state;
510
511 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
512 old_state = nmi_watchdog_enabled;
513 proc_dointvec(table, write, buffer, length, ppos);
514 if (!!old_state == !!nmi_watchdog_enabled)
515 return 0;
516
517 if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
518 printk(KERN_WARNING
519 "NMI watchdog is permanently disabled\n");
520 return -EIO;
521 }
522
523 if (nmi_watchdog == NMI_LOCAL_APIC) {
524 if (nmi_watchdog_enabled)
525 enable_lapic_nmi_watchdog();
526 else
527 disable_lapic_nmi_watchdog();
528 } else if (nmi_watchdog == NMI_IO_APIC) {
529 if (nmi_watchdog_enabled)
530 enable_ioapic_nmi_watchdog();
531 else
532 disable_ioapic_nmi_watchdog();
533 } else {
534 printk(KERN_WARNING
535 "NMI watchdog doesn't know what hardware to touch\n");
536 return -EIO;
537 }
538 return 0;
539}
540
541#endif /* CONFIG_SYSCTL */
542
543int do_nmi_callback(struct pt_regs *regs, int cpu)
544{
545#ifdef CONFIG_SYSCTL
546 if (unknown_nmi_panic)
547 return unknown_nmi_panic_callback(regs, cpu);
548#endif
549 return 0;
550}
551
552void arch_trigger_all_cpu_backtrace(void)
553{
554 int i;
555
556 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
557
558 printk(KERN_INFO "sending NMI to all CPUs:\n");
559 apic->send_IPI_all(NMI_VECTOR);
560
561 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
562 for (i = 0; i < 10 * 1000; i++) {
563 if (cpumask_empty(to_cpumask(backtrace_mask)))
564 break;
565 mdelay(1);
566 }
567}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4b68bda30938..1d59834396bd 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -894,7 +894,6 @@ void __init identify_boot_cpu(void)
894#else 894#else
895 vgetcpu_set_mode(); 895 vgetcpu_set_mode();
896#endif 896#endif
897 init_hw_perf_events();
898} 897}
899 898
900void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) 899void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 17ad03366211..9ecf81f9b90f 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -149,8 +149,7 @@ union _cpuid4_leaf_ecx {
149}; 149};
150 150
151struct amd_l3_cache { 151struct amd_l3_cache {
152 struct pci_dev *dev; 152 struct amd_northbridge *nb;
153 bool can_disable;
154 unsigned indices; 153 unsigned indices;
155 u8 subcaches[4]; 154 u8 subcaches[4];
156}; 155};
@@ -311,14 +310,12 @@ struct _cache_attr {
311/* 310/*
312 * L3 cache descriptors 311 * L3 cache descriptors
313 */ 312 */
314static struct amd_l3_cache **__cpuinitdata l3_caches;
315
316static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) 313static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
317{ 314{
318 unsigned int sc0, sc1, sc2, sc3; 315 unsigned int sc0, sc1, sc2, sc3;
319 u32 val = 0; 316 u32 val = 0;
320 317
321 pci_read_config_dword(l3->dev, 0x1C4, &val); 318 pci_read_config_dword(l3->nb->misc, 0x1C4, &val);
322 319
323 /* calculate subcache sizes */ 320 /* calculate subcache sizes */
324 l3->subcaches[0] = sc0 = !(val & BIT(0)); 321 l3->subcaches[0] = sc0 = !(val & BIT(0));
@@ -330,47 +327,14 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
330 l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; 327 l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
331} 328}
332 329
333static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node) 330static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
334{ 331 int index)
335 struct amd_l3_cache *l3;
336 struct pci_dev *dev = node_to_k8_nb_misc(node);
337
338 l3 = kzalloc(sizeof(struct amd_l3_cache), GFP_ATOMIC);
339 if (!l3) {
340 printk(KERN_WARNING "Error allocating L3 struct\n");
341 return NULL;
342 }
343
344 l3->dev = dev;
345
346 amd_calc_l3_indices(l3);
347
348 return l3;
349}
350
351static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
352 int index)
353{ 332{
333 static struct amd_l3_cache *__cpuinitdata l3_caches;
354 int node; 334 int node;
355 335
356 if (boot_cpu_data.x86 != 0x10) 336 /* only for L3, and not in virtualized environments */
357 return; 337 if (index < 3 || amd_nb_num() == 0)
358
359 if (index < 3)
360 return;
361
362 /* see errata #382 and #388 */
363 if (boot_cpu_data.x86_model < 0x8)
364 return;
365
366 if ((boot_cpu_data.x86_model == 0x8 ||
367 boot_cpu_data.x86_model == 0x9)
368 &&
369 boot_cpu_data.x86_mask < 0x1)
370 return;
371
372 /* not in virtualized environments */
373 if (k8_northbridges.num == 0)
374 return; 338 return;
375 339
376 /* 340 /*
@@ -378,7 +342,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
378 * never freed but this is done only on shutdown so it doesn't matter. 342 * never freed but this is done only on shutdown so it doesn't matter.
379 */ 343 */
380 if (!l3_caches) { 344 if (!l3_caches) {
381 int size = k8_northbridges.num * sizeof(struct amd_l3_cache *); 345 int size = amd_nb_num() * sizeof(struct amd_l3_cache);
382 346
383 l3_caches = kzalloc(size, GFP_ATOMIC); 347 l3_caches = kzalloc(size, GFP_ATOMIC);
384 if (!l3_caches) 348 if (!l3_caches)
@@ -387,14 +351,12 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
387 351
388 node = amd_get_nb_id(smp_processor_id()); 352 node = amd_get_nb_id(smp_processor_id());
389 353
390 if (!l3_caches[node]) { 354 if (!l3_caches[node].nb) {
391 l3_caches[node] = amd_init_l3_cache(node); 355 l3_caches[node].nb = node_to_amd_nb(node);
392 l3_caches[node]->can_disable = true; 356 amd_calc_l3_indices(&l3_caches[node]);
393 } 357 }
394 358
395 WARN_ON(!l3_caches[node]); 359 this_leaf->l3 = &l3_caches[node];
396
397 this_leaf->l3 = l3_caches[node];
398} 360}
399 361
400/* 362/*
@@ -408,7 +370,7 @@ int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot)
408{ 370{
409 unsigned int reg = 0; 371 unsigned int reg = 0;
410 372
411 pci_read_config_dword(l3->dev, 0x1BC + slot * 4, &reg); 373 pci_read_config_dword(l3->nb->misc, 0x1BC + slot * 4, &reg);
412 374
413 /* check whether this slot is activated already */ 375 /* check whether this slot is activated already */
414 if (reg & (3UL << 30)) 376 if (reg & (3UL << 30))
@@ -422,7 +384,8 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
422{ 384{
423 int index; 385 int index;
424 386
425 if (!this_leaf->l3 || !this_leaf->l3->can_disable) 387 if (!this_leaf->l3 ||
388 !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
426 return -EINVAL; 389 return -EINVAL;
427 390
428 index = amd_get_l3_disable_slot(this_leaf->l3, slot); 391 index = amd_get_l3_disable_slot(this_leaf->l3, slot);
@@ -457,7 +420,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
457 if (!l3->subcaches[i]) 420 if (!l3->subcaches[i])
458 continue; 421 continue;
459 422
460 pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); 423 pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
461 424
462 /* 425 /*
463 * We need to WBINVD on a core on the node containing the L3 426 * We need to WBINVD on a core on the node containing the L3
@@ -467,7 +430,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
467 wbinvd_on_cpu(cpu); 430 wbinvd_on_cpu(cpu);
468 431
469 reg |= BIT(31); 432 reg |= BIT(31);
470 pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); 433 pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
471 } 434 }
472} 435}
473 436
@@ -524,7 +487,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
524 if (!capable(CAP_SYS_ADMIN)) 487 if (!capable(CAP_SYS_ADMIN))
525 return -EPERM; 488 return -EPERM;
526 489
527 if (!this_leaf->l3 || !this_leaf->l3->can_disable) 490 if (!this_leaf->l3 ||
491 !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
528 return -EINVAL; 492 return -EINVAL;
529 493
530 cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); 494 cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
@@ -545,7 +509,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
545#define STORE_CACHE_DISABLE(slot) \ 509#define STORE_CACHE_DISABLE(slot) \
546static ssize_t \ 510static ssize_t \
547store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \ 511store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \
548 const char *buf, size_t count) \ 512 const char *buf, size_t count) \
549{ \ 513{ \
550 return store_cache_disable(this_leaf, buf, count, slot); \ 514 return store_cache_disable(this_leaf, buf, count, slot); \
551} 515}
@@ -558,10 +522,7 @@ static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
558 show_cache_disable_1, store_cache_disable_1); 522 show_cache_disable_1, store_cache_disable_1);
559 523
560#else /* CONFIG_AMD_NB */ 524#else /* CONFIG_AMD_NB */
561static void __cpuinit 525#define amd_init_l3_cache(x, y)
562amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index)
563{
564};
565#endif /* CONFIG_AMD_NB */ 526#endif /* CONFIG_AMD_NB */
566 527
567static int 528static int
@@ -575,7 +536,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index,
575 536
576 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { 537 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
577 amd_cpuid4(index, &eax, &ebx, &ecx); 538 amd_cpuid4(index, &eax, &ebx, &ecx);
578 amd_check_l3_disable(this_leaf, index); 539 amd_init_l3_cache(this_leaf, index);
579 } else { 540 } else {
580 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); 541 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
581 } 542 }
@@ -983,30 +944,48 @@ define_one_ro(size);
983define_one_ro(shared_cpu_map); 944define_one_ro(shared_cpu_map);
984define_one_ro(shared_cpu_list); 945define_one_ro(shared_cpu_list);
985 946
986#define DEFAULT_SYSFS_CACHE_ATTRS \
987 &type.attr, \
988 &level.attr, \
989 &coherency_line_size.attr, \
990 &physical_line_partition.attr, \
991 &ways_of_associativity.attr, \
992 &number_of_sets.attr, \
993 &size.attr, \
994 &shared_cpu_map.attr, \
995 &shared_cpu_list.attr
996
997static struct attribute *default_attrs[] = { 947static struct attribute *default_attrs[] = {
998 DEFAULT_SYSFS_CACHE_ATTRS, 948 &type.attr,
949 &level.attr,
950 &coherency_line_size.attr,
951 &physical_line_partition.attr,
952 &ways_of_associativity.attr,
953 &number_of_sets.attr,
954 &size.attr,
955 &shared_cpu_map.attr,
956 &shared_cpu_list.attr,
999 NULL 957 NULL
1000}; 958};
1001 959
1002static struct attribute *default_l3_attrs[] = {
1003 DEFAULT_SYSFS_CACHE_ATTRS,
1004#ifdef CONFIG_AMD_NB 960#ifdef CONFIG_AMD_NB
1005 &cache_disable_0.attr, 961static struct attribute ** __cpuinit amd_l3_attrs(void)
1006 &cache_disable_1.attr, 962{
963 static struct attribute **attrs;
964 int n;
965
966 if (attrs)
967 return attrs;
968
969 n = sizeof (default_attrs) / sizeof (struct attribute *);
970
971 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
972 n += 2;
973
974 attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
975 if (attrs == NULL)
976 return attrs = default_attrs;
977
978 for (n = 0; default_attrs[n]; n++)
979 attrs[n] = default_attrs[n];
980
981 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
982 attrs[n++] = &cache_disable_0.attr;
983 attrs[n++] = &cache_disable_1.attr;
984 }
985
986 return attrs;
987}
1007#endif 988#endif
1008 NULL
1009};
1010 989
1011static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) 990static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
1012{ 991{
@@ -1117,11 +1096,11 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
1117 1096
1118 this_leaf = CPUID4_INFO_IDX(cpu, i); 1097 this_leaf = CPUID4_INFO_IDX(cpu, i);
1119 1098
1120 if (this_leaf->l3 && this_leaf->l3->can_disable) 1099 ktype_cache.default_attrs = default_attrs;
1121 ktype_cache.default_attrs = default_l3_attrs; 1100#ifdef CONFIG_AMD_NB
1122 else 1101 if (this_leaf->l3)
1123 ktype_cache.default_attrs = default_attrs; 1102 ktype_cache.default_attrs = amd_l3_attrs();
1124 1103#endif
1125 retval = kobject_init_and_add(&(this_object->kobj), 1104 retval = kobject_init_and_add(&(this_object->kobj),
1126 &ktype_cache, 1105 &ktype_cache,
1127 per_cpu(ici_cache_kobject, cpu), 1106 per_cpu(ici_cache_kobject, cpu),
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 80c482382d5c..5bf2fac52aca 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -31,8 +31,6 @@
31#include <asm/mce.h> 31#include <asm/mce.h>
32#include <asm/msr.h> 32#include <asm/msr.h>
33 33
34#define PFX "mce_threshold: "
35#define VERSION "version 1.1.1"
36#define NR_BANKS 6 34#define NR_BANKS 6
37#define NR_BLOCKS 9 35#define NR_BLOCKS 9
38#define THRESHOLD_MAX 0xFFF 36#define THRESHOLD_MAX 0xFFF
@@ -59,12 +57,6 @@ struct threshold_block {
59 struct list_head miscj; 57 struct list_head miscj;
60}; 58};
61 59
62/* defaults used early on boot */
63static struct threshold_block threshold_defaults = {
64 .interrupt_enable = 0,
65 .threshold_limit = THRESHOLD_MAX,
66};
67
68struct threshold_bank { 60struct threshold_bank {
69 struct kobject *kobj; 61 struct kobject *kobj;
70 struct threshold_block *blocks; 62 struct threshold_block *blocks;
@@ -89,50 +81,101 @@ static void amd_threshold_interrupt(void);
89struct thresh_restart { 81struct thresh_restart {
90 struct threshold_block *b; 82 struct threshold_block *b;
91 int reset; 83 int reset;
84 int set_lvt_off;
85 int lvt_off;
92 u16 old_limit; 86 u16 old_limit;
93}; 87};
94 88
89static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
90{
91 int msr = (hi & MASK_LVTOFF_HI) >> 20;
92
93 if (apic < 0) {
94 pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
95 "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
96 b->bank, b->block, b->address, hi, lo);
97 return 0;
98 }
99
100 if (apic != msr) {
101 pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
102 "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
103 b->cpu, apic, b->bank, b->block, b->address, hi, lo);
104 return 0;
105 }
106
107 return 1;
108};
109
95/* must be called with correct cpu affinity */ 110/* must be called with correct cpu affinity */
96/* Called via smp_call_function_single() */ 111/* Called via smp_call_function_single() */
97static void threshold_restart_bank(void *_tr) 112static void threshold_restart_bank(void *_tr)
98{ 113{
99 struct thresh_restart *tr = _tr; 114 struct thresh_restart *tr = _tr;
100 u32 mci_misc_hi, mci_misc_lo; 115 u32 hi, lo;
101 116
102 rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi); 117 rdmsr(tr->b->address, lo, hi);
103 118
104 if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) 119 if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
105 tr->reset = 1; /* limit cannot be lower than err count */ 120 tr->reset = 1; /* limit cannot be lower than err count */
106 121
107 if (tr->reset) { /* reset err count and overflow bit */ 122 if (tr->reset) { /* reset err count and overflow bit */
108 mci_misc_hi = 123 hi =
109 (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | 124 (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
110 (THRESHOLD_MAX - tr->b->threshold_limit); 125 (THRESHOLD_MAX - tr->b->threshold_limit);
111 } else if (tr->old_limit) { /* change limit w/o reset */ 126 } else if (tr->old_limit) { /* change limit w/o reset */
112 int new_count = (mci_misc_hi & THRESHOLD_MAX) + 127 int new_count = (hi & THRESHOLD_MAX) +
113 (tr->old_limit - tr->b->threshold_limit); 128 (tr->old_limit - tr->b->threshold_limit);
114 129
115 mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | 130 hi = (hi & ~MASK_ERR_COUNT_HI) |
116 (new_count & THRESHOLD_MAX); 131 (new_count & THRESHOLD_MAX);
117 } 132 }
118 133
134 if (tr->set_lvt_off) {
135 if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
136 /* set new lvt offset */
137 hi &= ~MASK_LVTOFF_HI;
138 hi |= tr->lvt_off << 20;
139 }
140 }
141
119 tr->b->interrupt_enable ? 142 tr->b->interrupt_enable ?
120 (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : 143 (hi = (hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
121 (mci_misc_hi &= ~MASK_INT_TYPE_HI); 144 (hi &= ~MASK_INT_TYPE_HI);
122 145
123 mci_misc_hi |= MASK_COUNT_EN_HI; 146 hi |= MASK_COUNT_EN_HI;
124 wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi); 147 wrmsr(tr->b->address, lo, hi);
148}
149
150static void mce_threshold_block_init(struct threshold_block *b, int offset)
151{
152 struct thresh_restart tr = {
153 .b = b,
154 .set_lvt_off = 1,
155 .lvt_off = offset,
156 };
157
158 b->threshold_limit = THRESHOLD_MAX;
159 threshold_restart_bank(&tr);
160};
161
162static int setup_APIC_mce(int reserved, int new)
163{
164 if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
165 APIC_EILVT_MSG_FIX, 0))
166 return new;
167
168 return reserved;
125} 169}
126 170
127/* cpu init entry point, called from mce.c with preempt off */ 171/* cpu init entry point, called from mce.c with preempt off */
128void mce_amd_feature_init(struct cpuinfo_x86 *c) 172void mce_amd_feature_init(struct cpuinfo_x86 *c)
129{ 173{
174 struct threshold_block b;
130 unsigned int cpu = smp_processor_id(); 175 unsigned int cpu = smp_processor_id();
131 u32 low = 0, high = 0, address = 0; 176 u32 low = 0, high = 0, address = 0;
132 unsigned int bank, block; 177 unsigned int bank, block;
133 struct thresh_restart tr; 178 int offset = -1;
134 int lvt_off = -1;
135 u8 offset;
136 179
137 for (bank = 0; bank < NR_BANKS; ++bank) { 180 for (bank = 0; bank < NR_BANKS; ++bank) {
138 for (block = 0; block < NR_BLOCKS; ++block) { 181 for (block = 0; block < NR_BLOCKS; ++block) {
@@ -163,39 +206,16 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
163 if (shared_bank[bank] && c->cpu_core_id) 206 if (shared_bank[bank] && c->cpu_core_id)
164 break; 207 break;
165#endif 208#endif
166 offset = (high & MASK_LVTOFF_HI) >> 20; 209 offset = setup_APIC_mce(offset,
167 if (lvt_off < 0) { 210 (high & MASK_LVTOFF_HI) >> 20);
168 if (setup_APIC_eilvt(offset,
169 THRESHOLD_APIC_VECTOR,
170 APIC_EILVT_MSG_FIX, 0)) {
171 pr_err(FW_BUG "cpu %d, failed to "
172 "setup threshold interrupt "
173 "for bank %d, block %d "
174 "(MSR%08X=0x%x%08x)",
175 smp_processor_id(), bank, block,
176 address, high, low);
177 continue;
178 }
179 lvt_off = offset;
180 } else if (lvt_off != offset) {
181 pr_err(FW_BUG "cpu %d, invalid threshold "
182 "interrupt offset %d for bank %d,"
183 "block %d (MSR%08X=0x%x%08x)",
184 smp_processor_id(), lvt_off, bank,
185 block, address, high, low);
186 continue;
187 }
188
189 high &= ~MASK_LVTOFF_HI;
190 high |= lvt_off << 20;
191 wrmsr(address, low, high);
192 211
193 threshold_defaults.address = address; 212 memset(&b, 0, sizeof(b));
194 tr.b = &threshold_defaults; 213 b.cpu = cpu;
195 tr.reset = 0; 214 b.bank = bank;
196 tr.old_limit = 0; 215 b.block = block;
197 threshold_restart_bank(&tr); 216 b.address = address;
198 217
218 mce_threshold_block_init(&b, offset);
199 mce_threshold_vector = amd_threshold_interrupt; 219 mce_threshold_vector = amd_threshold_interrupt;
200 } 220 }
201 } 221 }
@@ -298,9 +318,8 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
298 318
299 b->interrupt_enable = !!new; 319 b->interrupt_enable = !!new;
300 320
321 memset(&tr, 0, sizeof(tr));
301 tr.b = b; 322 tr.b = b;
302 tr.reset = 0;
303 tr.old_limit = 0;
304 323
305 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); 324 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
306 325
@@ -321,10 +340,10 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
321 if (new < 1) 340 if (new < 1)
322 new = 1; 341 new = 1;
323 342
343 memset(&tr, 0, sizeof(tr));
324 tr.old_limit = b->threshold_limit; 344 tr.old_limit = b->threshold_limit;
325 b->threshold_limit = new; 345 b->threshold_limit = new;
326 tr.b = b; 346 tr.b = b;
327 tr.reset = 0;
328 347
329 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); 348 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
330 349
@@ -603,9 +622,9 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
603 continue; 622 continue;
604 err = threshold_create_bank(cpu, bank); 623 err = threshold_create_bank(cpu, bank);
605 if (err) 624 if (err)
606 goto out; 625 return err;
607 } 626 }
608out: 627
609 return err; 628 return err;
610} 629}
611 630
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6d75b9145b13..0a360d146596 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -330,9 +330,6 @@ static bool reserve_pmc_hardware(void)
330{ 330{
331 int i; 331 int i;
332 332
333 if (nmi_watchdog == NMI_LOCAL_APIC)
334 disable_lapic_nmi_watchdog();
335
336 for (i = 0; i < x86_pmu.num_counters; i++) { 333 for (i = 0; i < x86_pmu.num_counters; i++) {
337 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) 334 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
338 goto perfctr_fail; 335 goto perfctr_fail;
@@ -355,9 +352,6 @@ perfctr_fail:
355 for (i--; i >= 0; i--) 352 for (i--; i >= 0; i--)
356 release_perfctr_nmi(x86_pmu.perfctr + i); 353 release_perfctr_nmi(x86_pmu.perfctr + i);
357 354
358 if (nmi_watchdog == NMI_LOCAL_APIC)
359 enable_lapic_nmi_watchdog();
360
361 return false; 355 return false;
362} 356}
363 357
@@ -369,9 +363,6 @@ static void release_pmc_hardware(void)
369 release_perfctr_nmi(x86_pmu.perfctr + i); 363 release_perfctr_nmi(x86_pmu.perfctr + i);
370 release_evntsel_nmi(x86_pmu.eventsel + i); 364 release_evntsel_nmi(x86_pmu.eventsel + i);
371 } 365 }
372
373 if (nmi_watchdog == NMI_LOCAL_APIC)
374 enable_lapic_nmi_watchdog();
375} 366}
376 367
377#else 368#else
@@ -384,15 +375,53 @@ static void release_pmc_hardware(void) {}
384static bool check_hw_exists(void) 375static bool check_hw_exists(void)
385{ 376{
386 u64 val, val_new = 0; 377 u64 val, val_new = 0;
387 int ret = 0; 378 int i, reg, ret = 0;
379
380 /*
381 * Check to see if the BIOS enabled any of the counters, if so
382 * complain and bail.
383 */
384 for (i = 0; i < x86_pmu.num_counters; i++) {
385 reg = x86_pmu.eventsel + i;
386 ret = rdmsrl_safe(reg, &val);
387 if (ret)
388 goto msr_fail;
389 if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
390 goto bios_fail;
391 }
388 392
393 if (x86_pmu.num_counters_fixed) {
394 reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
395 ret = rdmsrl_safe(reg, &val);
396 if (ret)
397 goto msr_fail;
398 for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
399 if (val & (0x03 << i*4))
400 goto bios_fail;
401 }
402 }
403
404 /*
405 * Now write a value and read it back to see if it matches,
406 * this is needed to detect certain hardware emulators (qemu/kvm)
407 * that don't trap on the MSR access and always return 0s.
408 */
389 val = 0xabcdUL; 409 val = 0xabcdUL;
390 ret |= checking_wrmsrl(x86_pmu.perfctr, val); 410 ret = checking_wrmsrl(x86_pmu.perfctr, val);
391 ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new); 411 ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
392 if (ret || val != val_new) 412 if (ret || val != val_new)
393 return false; 413 goto msr_fail;
394 414
395 return true; 415 return true;
416
417bios_fail:
418 printk(KERN_CONT "Broken BIOS detected, using software events only.\n");
419 printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val);
420 return false;
421
422msr_fail:
423 printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
424 return false;
396} 425}
397 426
398static void reserve_ds_buffers(void); 427static void reserve_ds_buffers(void);
@@ -451,7 +480,7 @@ static int x86_setup_perfctr(struct perf_event *event)
451 struct hw_perf_event *hwc = &event->hw; 480 struct hw_perf_event *hwc = &event->hw;
452 u64 config; 481 u64 config;
453 482
454 if (!hwc->sample_period) { 483 if (!is_sampling_event(event)) {
455 hwc->sample_period = x86_pmu.max_period; 484 hwc->sample_period = x86_pmu.max_period;
456 hwc->last_period = hwc->sample_period; 485 hwc->last_period = hwc->sample_period;
457 local64_set(&hwc->period_left, hwc->sample_period); 486 local64_set(&hwc->period_left, hwc->sample_period);
@@ -1362,7 +1391,7 @@ static void __init pmu_check_apic(void)
1362 pr_info("no hardware sampling interrupt available.\n"); 1391 pr_info("no hardware sampling interrupt available.\n");
1363} 1392}
1364 1393
1365void __init init_hw_perf_events(void) 1394int __init init_hw_perf_events(void)
1366{ 1395{
1367 struct event_constraint *c; 1396 struct event_constraint *c;
1368 int err; 1397 int err;
@@ -1377,20 +1406,18 @@ void __init init_hw_perf_events(void)
1377 err = amd_pmu_init(); 1406 err = amd_pmu_init();
1378 break; 1407 break;
1379 default: 1408 default:
1380 return; 1409 return 0;
1381 } 1410 }
1382 if (err != 0) { 1411 if (err != 0) {
1383 pr_cont("no PMU driver, software events only.\n"); 1412 pr_cont("no PMU driver, software events only.\n");
1384 return; 1413 return 0;
1385 } 1414 }
1386 1415
1387 pmu_check_apic(); 1416 pmu_check_apic();
1388 1417
1389 /* sanity check that the hardware exists or is emulated */ 1418 /* sanity check that the hardware exists or is emulated */
1390 if (!check_hw_exists()) { 1419 if (!check_hw_exists())
1391 pr_cont("Broken PMU hardware detected, software events only.\n"); 1420 return 0;
1392 return;
1393 }
1394 1421
1395 pr_cont("%s PMU driver.\n", x86_pmu.name); 1422 pr_cont("%s PMU driver.\n", x86_pmu.name);
1396 1423
@@ -1438,9 +1465,12 @@ void __init init_hw_perf_events(void)
1438 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); 1465 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
1439 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); 1466 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
1440 1467
1441 perf_pmu_register(&pmu); 1468 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
1442 perf_cpu_notifier(x86_pmu_notifier); 1469 perf_cpu_notifier(x86_pmu_notifier);
1470
1471 return 0;
1443} 1472}
1473early_initcall(init_hw_perf_events);
1444 1474
1445static inline void x86_pmu_read(struct perf_event *event) 1475static inline void x86_pmu_read(struct perf_event *event)
1446{ 1476{
@@ -1686,7 +1716,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
1686 1716
1687 perf_callchain_store(entry, regs->ip); 1717 perf_callchain_store(entry, regs->ip);
1688 1718
1689 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); 1719 dump_trace(NULL, regs, NULL, &backtrace_ops, entry);
1690} 1720}
1691 1721
1692#ifdef CONFIG_COMPAT 1722#ifdef CONFIG_COMPAT
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index e421b8cd6944..67e2202a6039 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -1,7 +1,5 @@
1#ifdef CONFIG_CPU_SUP_AMD 1#ifdef CONFIG_CPU_SUP_AMD
2 2
3static DEFINE_RAW_SPINLOCK(amd_nb_lock);
4
5static __initconst const u64 amd_hw_cache_event_ids 3static __initconst const u64 amd_hw_cache_event_ids
6 [PERF_COUNT_HW_CACHE_MAX] 4 [PERF_COUNT_HW_CACHE_MAX]
7 [PERF_COUNT_HW_CACHE_OP_MAX] 5 [PERF_COUNT_HW_CACHE_OP_MAX]
@@ -275,7 +273,7 @@ done:
275 return &emptyconstraint; 273 return &emptyconstraint;
276} 274}
277 275
278static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) 276static struct amd_nb *amd_alloc_nb(int cpu)
279{ 277{
280 struct amd_nb *nb; 278 struct amd_nb *nb;
281 int i; 279 int i;
@@ -285,7 +283,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
285 if (!nb) 283 if (!nb)
286 return NULL; 284 return NULL;
287 285
288 nb->nb_id = nb_id; 286 nb->nb_id = -1;
289 287
290 /* 288 /*
291 * initialize all possible NB constraints 289 * initialize all possible NB constraints
@@ -306,7 +304,7 @@ static int amd_pmu_cpu_prepare(int cpu)
306 if (boot_cpu_data.x86_max_cores < 2) 304 if (boot_cpu_data.x86_max_cores < 2)
307 return NOTIFY_OK; 305 return NOTIFY_OK;
308 306
309 cpuc->amd_nb = amd_alloc_nb(cpu, -1); 307 cpuc->amd_nb = amd_alloc_nb(cpu);
310 if (!cpuc->amd_nb) 308 if (!cpuc->amd_nb)
311 return NOTIFY_BAD; 309 return NOTIFY_BAD;
312 310
@@ -325,8 +323,6 @@ static void amd_pmu_cpu_starting(int cpu)
325 nb_id = amd_get_nb_id(cpu); 323 nb_id = amd_get_nb_id(cpu);
326 WARN_ON_ONCE(nb_id == BAD_APICID); 324 WARN_ON_ONCE(nb_id == BAD_APICID);
327 325
328 raw_spin_lock(&amd_nb_lock);
329
330 for_each_online_cpu(i) { 326 for_each_online_cpu(i) {
331 nb = per_cpu(cpu_hw_events, i).amd_nb; 327 nb = per_cpu(cpu_hw_events, i).amd_nb;
332 if (WARN_ON_ONCE(!nb)) 328 if (WARN_ON_ONCE(!nb))
@@ -341,8 +337,6 @@ static void amd_pmu_cpu_starting(int cpu)
341 337
342 cpuc->amd_nb->nb_id = nb_id; 338 cpuc->amd_nb->nb_id = nb_id;
343 cpuc->amd_nb->refcnt++; 339 cpuc->amd_nb->refcnt++;
344
345 raw_spin_unlock(&amd_nb_lock);
346} 340}
347 341
348static void amd_pmu_cpu_dead(int cpu) 342static void amd_pmu_cpu_dead(int cpu)
@@ -354,8 +348,6 @@ static void amd_pmu_cpu_dead(int cpu)
354 348
355 cpuhw = &per_cpu(cpu_hw_events, cpu); 349 cpuhw = &per_cpu(cpu_hw_events, cpu);
356 350
357 raw_spin_lock(&amd_nb_lock);
358
359 if (cpuhw->amd_nb) { 351 if (cpuhw->amd_nb) {
360 struct amd_nb *nb = cpuhw->amd_nb; 352 struct amd_nb *nb = cpuhw->amd_nb;
361 353
@@ -364,8 +356,6 @@ static void amd_pmu_cpu_dead(int cpu)
364 356
365 cpuhw->amd_nb = NULL; 357 cpuhw->amd_nb = NULL;
366 } 358 }
367
368 raw_spin_unlock(&amd_nb_lock);
369} 359}
370 360
371static __initconst const struct x86_pmu amd_pmu = { 361static __initconst const struct x86_pmu amd_pmu = {
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index c8f5c088cad1..24e390e40f2e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -816,6 +816,32 @@ static int intel_pmu_hw_config(struct perf_event *event)
816 if (ret) 816 if (ret)
817 return ret; 817 return ret;
818 818
819 if (event->attr.precise_ip &&
820 (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
821 /*
822 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
823 * (0x003c) so that we can use it with PEBS.
824 *
825 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
826 * PEBS capable. However we can use INST_RETIRED.ANY_P
827 * (0x00c0), which is a PEBS capable event, to get the same
828 * count.
829 *
830 * INST_RETIRED.ANY_P counts the number of cycles that retires
831 * CNTMASK instructions. By setting CNTMASK to a value (16)
832 * larger than the maximum number of instructions that can be
833 * retired per cycle (4) and then inverting the condition, we
834 * count all cycles that retire 16 or less instructions, which
835 * is every cycle.
836 *
837 * Thereby we gain a PEBS capable cycle counter.
838 */
839 u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */
840
841 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
842 event->hw.config = alt_config;
843 }
844
819 if (event->attr.type != PERF_TYPE_RAW) 845 if (event->attr.type != PERF_TYPE_RAW)
820 return 0; 846 return 0;
821 847
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index d9f4ff8fcd69..d5a236615501 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -16,32 +16,12 @@
16#include <linux/kernel.h> 16#include <linux/kernel.h>
17#include <linux/bitops.h> 17#include <linux/bitops.h>
18#include <linux/smp.h> 18#include <linux/smp.h>
19#include <linux/nmi.h> 19#include <asm/nmi.h>
20#include <linux/kprobes.h> 20#include <linux/kprobes.h>
21 21
22#include <asm/apic.h> 22#include <asm/apic.h>
23#include <asm/perf_event.h> 23#include <asm/perf_event.h>
24 24
25struct nmi_watchdog_ctlblk {
26 unsigned int cccr_msr;
27 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
28 unsigned int evntsel_msr; /* the MSR to select the events to handle */
29};
30
31/* Interface defining a CPU specific perfctr watchdog */
32struct wd_ops {
33 int (*reserve)(void);
34 void (*unreserve)(void);
35 int (*setup)(unsigned nmi_hz);
36 void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
37 void (*stop)(void);
38 unsigned perfctr;
39 unsigned evntsel;
40 u64 checkbit;
41};
42
43static const struct wd_ops *wd_ops;
44
45/* 25/*
46 * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's 26 * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
47 * offset from MSR_P4_BSU_ESCR0. 27 * offset from MSR_P4_BSU_ESCR0.
@@ -60,8 +40,6 @@ static const struct wd_ops *wd_ops;
60static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); 40static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
61static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); 41static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
62 42
63static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
64
65/* converts an msr to an appropriate reservation bit */ 43/* converts an msr to an appropriate reservation bit */
66static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) 44static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
67{ 45{
@@ -172,623 +150,3 @@ void release_evntsel_nmi(unsigned int msr)
172 clear_bit(counter, evntsel_nmi_owner); 150 clear_bit(counter, evntsel_nmi_owner);
173} 151}
174EXPORT_SYMBOL(release_evntsel_nmi); 152EXPORT_SYMBOL(release_evntsel_nmi);
175
176void disable_lapic_nmi_watchdog(void)
177{
178 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
179
180 if (atomic_read(&nmi_active) <= 0)
181 return;
182
183 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
184
185 if (wd_ops)
186 wd_ops->unreserve();
187
188 BUG_ON(atomic_read(&nmi_active) != 0);
189}
190
191void enable_lapic_nmi_watchdog(void)
192{
193 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
194
195 /* are we already enabled */
196 if (atomic_read(&nmi_active) != 0)
197 return;
198
199 /* are we lapic aware */
200 if (!wd_ops)
201 return;
202 if (!wd_ops->reserve()) {
203 printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
204 return;
205 }
206
207 on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
208 touch_nmi_watchdog();
209}
210
211/*
212 * Activate the NMI watchdog via the local APIC.
213 */
214
215static unsigned int adjust_for_32bit_ctr(unsigned int hz)
216{
217 u64 counter_val;
218 unsigned int retval = hz;
219
220 /*
221 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
222 * are writable, with higher bits sign extending from bit 31.
223 * So, we can only program the counter with 31 bit values and
224 * 32nd bit should be 1, for 33.. to be 1.
225 * Find the appropriate nmi_hz
226 */
227 counter_val = (u64)cpu_khz * 1000;
228 do_div(counter_val, retval);
229 if (counter_val > 0x7fffffffULL) {
230 u64 count = (u64)cpu_khz * 1000;
231 do_div(count, 0x7fffffffUL);
232 retval = count + 1;
233 }
234 return retval;
235}
236
237static void write_watchdog_counter(unsigned int perfctr_msr,
238 const char *descr, unsigned nmi_hz)
239{
240 u64 count = (u64)cpu_khz * 1000;
241
242 do_div(count, nmi_hz);
243 if (descr)
244 pr_debug("setting %s to -0x%08Lx\n", descr, count);
245 wrmsrl(perfctr_msr, 0 - count);
246}
247
248static void write_watchdog_counter32(unsigned int perfctr_msr,
249 const char *descr, unsigned nmi_hz)
250{
251 u64 count = (u64)cpu_khz * 1000;
252
253 do_div(count, nmi_hz);
254 if (descr)
255 pr_debug("setting %s to -0x%08Lx\n", descr, count);
256 wrmsr(perfctr_msr, (u32)(-count), 0);
257}
258
259/*
260 * AMD K7/K8/Family10h/Family11h support.
261 * AMD keeps this interface nicely stable so there is not much variety
262 */
263#define K7_EVNTSEL_ENABLE (1 << 22)
264#define K7_EVNTSEL_INT (1 << 20)
265#define K7_EVNTSEL_OS (1 << 17)
266#define K7_EVNTSEL_USR (1 << 16)
267#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
268#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
269
270static int setup_k7_watchdog(unsigned nmi_hz)
271{
272 unsigned int perfctr_msr, evntsel_msr;
273 unsigned int evntsel;
274 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
275
276 perfctr_msr = wd_ops->perfctr;
277 evntsel_msr = wd_ops->evntsel;
278
279 wrmsrl(perfctr_msr, 0UL);
280
281 evntsel = K7_EVNTSEL_INT
282 | K7_EVNTSEL_OS
283 | K7_EVNTSEL_USR
284 | K7_NMI_EVENT;
285
286 /* setup the timer */
287 wrmsr(evntsel_msr, evntsel, 0);
288 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz);
289
290 /* initialize the wd struct before enabling */
291 wd->perfctr_msr = perfctr_msr;
292 wd->evntsel_msr = evntsel_msr;
293 wd->cccr_msr = 0; /* unused */
294
295 /* ok, everything is initialized, announce that we're set */
296 cpu_nmi_set_wd_enabled();
297
298 apic_write(APIC_LVTPC, APIC_DM_NMI);
299 evntsel |= K7_EVNTSEL_ENABLE;
300 wrmsr(evntsel_msr, evntsel, 0);
301
302 return 1;
303}
304
305static void single_msr_stop_watchdog(void)
306{
307 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
308
309 wrmsr(wd->evntsel_msr, 0, 0);
310}
311
312static int single_msr_reserve(void)
313{
314 if (!reserve_perfctr_nmi(wd_ops->perfctr))
315 return 0;
316
317 if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
318 release_perfctr_nmi(wd_ops->perfctr);
319 return 0;
320 }
321 return 1;
322}
323
324static void single_msr_unreserve(void)
325{
326 release_evntsel_nmi(wd_ops->evntsel);
327 release_perfctr_nmi(wd_ops->perfctr);
328}
329
330static void __kprobes
331single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
332{
333 /* start the cycle over again */
334 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
335}
336
337static const struct wd_ops k7_wd_ops = {
338 .reserve = single_msr_reserve,
339 .unreserve = single_msr_unreserve,
340 .setup = setup_k7_watchdog,
341 .rearm = single_msr_rearm,
342 .stop = single_msr_stop_watchdog,
343 .perfctr = MSR_K7_PERFCTR0,
344 .evntsel = MSR_K7_EVNTSEL0,
345 .checkbit = 1ULL << 47,
346};
347
348/*
349 * Intel Model 6 (PPro+,P2,P3,P-M,Core1)
350 */
351#define P6_EVNTSEL0_ENABLE (1 << 22)
352#define P6_EVNTSEL_INT (1 << 20)
353#define P6_EVNTSEL_OS (1 << 17)
354#define P6_EVNTSEL_USR (1 << 16)
355#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
356#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
357
358static int setup_p6_watchdog(unsigned nmi_hz)
359{
360 unsigned int perfctr_msr, evntsel_msr;
361 unsigned int evntsel;
362 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
363
364 perfctr_msr = wd_ops->perfctr;
365 evntsel_msr = wd_ops->evntsel;
366
367 /* KVM doesn't implement this MSR */
368 if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
369 return 0;
370
371 evntsel = P6_EVNTSEL_INT
372 | P6_EVNTSEL_OS
373 | P6_EVNTSEL_USR
374 | P6_NMI_EVENT;
375
376 /* setup the timer */
377 wrmsr(evntsel_msr, evntsel, 0);
378 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
379 write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz);
380
381 /* initialize the wd struct before enabling */
382 wd->perfctr_msr = perfctr_msr;
383 wd->evntsel_msr = evntsel_msr;
384 wd->cccr_msr = 0; /* unused */
385
386 /* ok, everything is initialized, announce that we're set */
387 cpu_nmi_set_wd_enabled();
388
389 apic_write(APIC_LVTPC, APIC_DM_NMI);
390 evntsel |= P6_EVNTSEL0_ENABLE;
391 wrmsr(evntsel_msr, evntsel, 0);
392
393 return 1;
394}
395
396static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
397{
398 /*
399 * P6 based Pentium M need to re-unmask
400 * the apic vector but it doesn't hurt
401 * other P6 variant.
402 * ArchPerfom/Core Duo also needs this
403 */
404 apic_write(APIC_LVTPC, APIC_DM_NMI);
405
406 /* P6/ARCH_PERFMON has 32 bit counter write */
407 write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz);
408}
409
410static const struct wd_ops p6_wd_ops = {
411 .reserve = single_msr_reserve,
412 .unreserve = single_msr_unreserve,
413 .setup = setup_p6_watchdog,
414 .rearm = p6_rearm,
415 .stop = single_msr_stop_watchdog,
416 .perfctr = MSR_P6_PERFCTR0,
417 .evntsel = MSR_P6_EVNTSEL0,
418 .checkbit = 1ULL << 39,
419};
420
421/*
422 * Intel P4 performance counters.
423 * By far the most complicated of all.
424 */
425#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7)
426#define P4_ESCR_EVENT_SELECT(N) ((N) << 25)
427#define P4_ESCR_OS (1 << 3)
428#define P4_ESCR_USR (1 << 2)
429#define P4_CCCR_OVF_PMI0 (1 << 26)
430#define P4_CCCR_OVF_PMI1 (1 << 27)
431#define P4_CCCR_THRESHOLD(N) ((N) << 20)
432#define P4_CCCR_COMPLEMENT (1 << 19)
433#define P4_CCCR_COMPARE (1 << 18)
434#define P4_CCCR_REQUIRED (3 << 16)
435#define P4_CCCR_ESCR_SELECT(N) ((N) << 13)
436#define P4_CCCR_ENABLE (1 << 12)
437#define P4_CCCR_OVF (1 << 31)
438
439#define P4_CONTROLS 18
440static unsigned int p4_controls[18] = {
441 MSR_P4_BPU_CCCR0,
442 MSR_P4_BPU_CCCR1,
443 MSR_P4_BPU_CCCR2,
444 MSR_P4_BPU_CCCR3,
445 MSR_P4_MS_CCCR0,
446 MSR_P4_MS_CCCR1,
447 MSR_P4_MS_CCCR2,
448 MSR_P4_MS_CCCR3,
449 MSR_P4_FLAME_CCCR0,
450 MSR_P4_FLAME_CCCR1,
451 MSR_P4_FLAME_CCCR2,
452 MSR_P4_FLAME_CCCR3,
453 MSR_P4_IQ_CCCR0,
454 MSR_P4_IQ_CCCR1,
455 MSR_P4_IQ_CCCR2,
456 MSR_P4_IQ_CCCR3,
457 MSR_P4_IQ_CCCR4,
458 MSR_P4_IQ_CCCR5,
459};
460/*
461 * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
462 * CRU_ESCR0 (with any non-null event selector) through a complemented
463 * max threshold. [IA32-Vol3, Section 14.9.9]
464 */
465static int setup_p4_watchdog(unsigned nmi_hz)
466{
467 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
468 unsigned int evntsel, cccr_val;
469 unsigned int misc_enable, dummy;
470 unsigned int ht_num;
471 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
472
473 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
474 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
475 return 0;
476
477#ifdef CONFIG_SMP
478 /* detect which hyperthread we are on */
479 if (smp_num_siblings == 2) {
480 unsigned int ebx, apicid;
481
482 ebx = cpuid_ebx(1);
483 apicid = (ebx >> 24) & 0xff;
484 ht_num = apicid & 1;
485 } else
486#endif
487 ht_num = 0;
488
489 /*
490 * performance counters are shared resources
491 * assign each hyperthread its own set
492 * (re-use the ESCR0 register, seems safe
493 * and keeps the cccr_val the same)
494 */
495 if (!ht_num) {
496 /* logical cpu 0 */
497 perfctr_msr = MSR_P4_IQ_PERFCTR0;
498 evntsel_msr = MSR_P4_CRU_ESCR0;
499 cccr_msr = MSR_P4_IQ_CCCR0;
500 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
501
502 /*
503 * If we're on the kdump kernel or other situation, we may
504 * still have other performance counter registers set to
505 * interrupt and they'll keep interrupting forever because
506 * of the P4_CCCR_OVF quirk. So we need to ACK all the
507 * pending interrupts and disable all the registers here,
508 * before reenabling the NMI delivery. Refer to p4_rearm()
509 * about the P4_CCCR_OVF quirk.
510 */
511 if (reset_devices) {
512 unsigned int low, high;
513 int i;
514
515 for (i = 0; i < P4_CONTROLS; i++) {
516 rdmsr(p4_controls[i], low, high);
517 low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
518 wrmsr(p4_controls[i], low, high);
519 }
520 }
521 } else {
522 /* logical cpu 1 */
523 perfctr_msr = MSR_P4_IQ_PERFCTR1;
524 evntsel_msr = MSR_P4_CRU_ESCR0;
525 cccr_msr = MSR_P4_IQ_CCCR1;
526
527 /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
528 if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
529 cccr_val = P4_CCCR_OVF_PMI0;
530 else
531 cccr_val = P4_CCCR_OVF_PMI1;
532 cccr_val |= P4_CCCR_ESCR_SELECT(4);
533 }
534
535 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
536 | P4_ESCR_OS
537 | P4_ESCR_USR;
538
539 cccr_val |= P4_CCCR_THRESHOLD(15)
540 | P4_CCCR_COMPLEMENT
541 | P4_CCCR_COMPARE
542 | P4_CCCR_REQUIRED;
543
544 wrmsr(evntsel_msr, evntsel, 0);
545 wrmsr(cccr_msr, cccr_val, 0);
546 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
547
548 wd->perfctr_msr = perfctr_msr;
549 wd->evntsel_msr = evntsel_msr;
550 wd->cccr_msr = cccr_msr;
551
552 /* ok, everything is initialized, announce that we're set */
553 cpu_nmi_set_wd_enabled();
554
555 apic_write(APIC_LVTPC, APIC_DM_NMI);
556 cccr_val |= P4_CCCR_ENABLE;
557 wrmsr(cccr_msr, cccr_val, 0);
558 return 1;
559}
560
561static void stop_p4_watchdog(void)
562{
563 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
564 wrmsr(wd->cccr_msr, 0, 0);
565 wrmsr(wd->evntsel_msr, 0, 0);
566}
567
568static int p4_reserve(void)
569{
570 if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
571 return 0;
572#ifdef CONFIG_SMP
573 if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
574 goto fail1;
575#endif
576 if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
577 goto fail2;
578 /* RED-PEN why is ESCR1 not reserved here? */
579 return 1;
580 fail2:
581#ifdef CONFIG_SMP
582 if (smp_num_siblings > 1)
583 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
584 fail1:
585#endif
586 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
587 return 0;
588}
589
590static void p4_unreserve(void)
591{
592#ifdef CONFIG_SMP
593 if (smp_num_siblings > 1)
594 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
595#endif
596 release_evntsel_nmi(MSR_P4_CRU_ESCR0);
597 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
598}
599
600static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
601{
602 unsigned dummy;
603 /*
604 * P4 quirks:
605 * - An overflown perfctr will assert its interrupt
606 * until the OVF flag in its CCCR is cleared.
607 * - LVTPC is masked on interrupt and must be
608 * unmasked by the LVTPC handler.
609 */
610 rdmsrl(wd->cccr_msr, dummy);
611 dummy &= ~P4_CCCR_OVF;
612 wrmsrl(wd->cccr_msr, dummy);
613 apic_write(APIC_LVTPC, APIC_DM_NMI);
614 /* start the cycle over again */
615 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
616}
617
618static const struct wd_ops p4_wd_ops = {
619 .reserve = p4_reserve,
620 .unreserve = p4_unreserve,
621 .setup = setup_p4_watchdog,
622 .rearm = p4_rearm,
623 .stop = stop_p4_watchdog,
624 /* RED-PEN this is wrong for the other sibling */
625 .perfctr = MSR_P4_BPU_PERFCTR0,
626 .evntsel = MSR_P4_BSU_ESCR0,
627 .checkbit = 1ULL << 39,
628};
629
630/*
631 * Watchdog using the Intel architected PerfMon.
632 * Used for Core2 and hopefully all future Intel CPUs.
633 */
634#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
635#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
636
637static struct wd_ops intel_arch_wd_ops;
638
639static int setup_intel_arch_watchdog(unsigned nmi_hz)
640{
641 unsigned int ebx;
642 union cpuid10_eax eax;
643 unsigned int unused;
644 unsigned int perfctr_msr, evntsel_msr;
645 unsigned int evntsel;
646 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
647
648 /*
649 * Check whether the Architectural PerfMon supports
650 * Unhalted Core Cycles Event or not.
651 * NOTE: Corresponding bit = 0 in ebx indicates event present.
652 */
653 cpuid(10, &(eax.full), &ebx, &unused, &unused);
654 if ((eax.split.mask_length <
655 (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
656 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
657 return 0;
658
659 perfctr_msr = wd_ops->perfctr;
660 evntsel_msr = wd_ops->evntsel;
661
662 wrmsrl(perfctr_msr, 0UL);
663
664 evntsel = ARCH_PERFMON_EVENTSEL_INT
665 | ARCH_PERFMON_EVENTSEL_OS
666 | ARCH_PERFMON_EVENTSEL_USR
667 | ARCH_PERFMON_NMI_EVENT_SEL
668 | ARCH_PERFMON_NMI_EVENT_UMASK;
669
670 /* setup the timer */
671 wrmsr(evntsel_msr, evntsel, 0);
672 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
673 write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
674
675 wd->perfctr_msr = perfctr_msr;
676 wd->evntsel_msr = evntsel_msr;
677 wd->cccr_msr = 0; /* unused */
678
679 /* ok, everything is initialized, announce that we're set */
680 cpu_nmi_set_wd_enabled();
681
682 apic_write(APIC_LVTPC, APIC_DM_NMI);
683 evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE;
684 wrmsr(evntsel_msr, evntsel, 0);
685 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
686 return 1;
687}
688
689static struct wd_ops intel_arch_wd_ops __read_mostly = {
690 .reserve = single_msr_reserve,
691 .unreserve = single_msr_unreserve,
692 .setup = setup_intel_arch_watchdog,
693 .rearm = p6_rearm,
694 .stop = single_msr_stop_watchdog,
695 .perfctr = MSR_ARCH_PERFMON_PERFCTR1,
696 .evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
697};
698
699static void probe_nmi_watchdog(void)
700{
701 switch (boot_cpu_data.x86_vendor) {
702 case X86_VENDOR_AMD:
703 if (boot_cpu_data.x86 == 6 ||
704 (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15))
705 wd_ops = &k7_wd_ops;
706 return;
707 case X86_VENDOR_INTEL:
708 /* Work around where perfctr1 doesn't have a working enable
709 * bit as described in the following errata:
710 * AE49 Core Duo and Intel Core Solo 65 nm
711 * AN49 Intel Pentium Dual-Core
712 * AF49 Dual-Core Intel Xeon Processor LV
713 */
714 if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) ||
715 ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 &&
716 boot_cpu_data.x86_mask == 4))) {
717 intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
718 intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
719 }
720 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
721 wd_ops = &intel_arch_wd_ops;
722 break;
723 }
724 switch (boot_cpu_data.x86) {
725 case 6:
726 if (boot_cpu_data.x86_model > 13)
727 return;
728
729 wd_ops = &p6_wd_ops;
730 break;
731 case 15:
732 wd_ops = &p4_wd_ops;
733 break;
734 default:
735 return;
736 }
737 break;
738 }
739}
740
741/* Interface to nmi.c */
742
743int lapic_watchdog_init(unsigned nmi_hz)
744{
745 if (!wd_ops) {
746 probe_nmi_watchdog();
747 if (!wd_ops) {
748 printk(KERN_INFO "NMI watchdog: CPU not supported\n");
749 return -1;
750 }
751
752 if (!wd_ops->reserve()) {
753 printk(KERN_ERR
754 "NMI watchdog: cannot reserve perfctrs\n");
755 return -1;
756 }
757 }
758
759 if (!(wd_ops->setup(nmi_hz))) {
760 printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
761 raw_smp_processor_id());
762 return -1;
763 }
764
765 return 0;
766}
767
768void lapic_watchdog_stop(void)
769{
770 if (wd_ops)
771 wd_ops->stop();
772}
773
774unsigned lapic_adjust_nmi_hz(unsigned hz)
775{
776 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
777 if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
778 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
779 hz = adjust_for_32bit_ctr(hz);
780 return hz;
781}
782
783int __kprobes lapic_wd_event(unsigned nmi_hz)
784{
785 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
786 u64 ctr;
787
788 rdmsrl(wd->perfctr_msr, ctr);
789 if (ctr & wd_ops->checkbit) /* perfctr still running? */
790 return 0;
791
792 wd_ops->rearm(wd, nmi_hz);
793 return 1;
794}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 6e8752c1bd52..8474c998cbd4 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -175,21 +175,21 @@ static const struct stacktrace_ops print_trace_ops = {
175 175
176void 176void
177show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 177show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
178 unsigned long *stack, unsigned long bp, char *log_lvl) 178 unsigned long *stack, char *log_lvl)
179{ 179{
180 printk("%sCall Trace:\n", log_lvl); 180 printk("%sCall Trace:\n", log_lvl);
181 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); 181 dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
182} 182}
183 183
184void show_trace(struct task_struct *task, struct pt_regs *regs, 184void show_trace(struct task_struct *task, struct pt_regs *regs,
185 unsigned long *stack, unsigned long bp) 185 unsigned long *stack)
186{ 186{
187 show_trace_log_lvl(task, regs, stack, bp, ""); 187 show_trace_log_lvl(task, regs, stack, "");
188} 188}
189 189
190void show_stack(struct task_struct *task, unsigned long *sp) 190void show_stack(struct task_struct *task, unsigned long *sp)
191{ 191{
192 show_stack_log_lvl(task, NULL, sp, 0, ""); 192 show_stack_log_lvl(task, NULL, sp, "");
193} 193}
194 194
195/* 195/*
@@ -210,7 +210,7 @@ void dump_stack(void)
210 init_utsname()->release, 210 init_utsname()->release,
211 (int)strcspn(init_utsname()->version, " "), 211 (int)strcspn(init_utsname()->version, " "),
212 init_utsname()->version); 212 init_utsname()->version);
213 show_trace(NULL, NULL, &stack, bp); 213 show_trace(NULL, NULL, &stack);
214} 214}
215EXPORT_SYMBOL(dump_stack); 215EXPORT_SYMBOL(dump_stack);
216 216
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 1bc7f75a5bda..74cc1eda384b 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -17,11 +17,12 @@
17#include <asm/stacktrace.h> 17#include <asm/stacktrace.h>
18 18
19 19
20void dump_trace(struct task_struct *task, struct pt_regs *regs, 20void dump_trace(struct task_struct *task,
21 unsigned long *stack, unsigned long bp, 21 struct pt_regs *regs, unsigned long *stack,
22 const struct stacktrace_ops *ops, void *data) 22 const struct stacktrace_ops *ops, void *data)
23{ 23{
24 int graph = 0; 24 int graph = 0;
25 unsigned long bp;
25 26
26 if (!task) 27 if (!task)
27 task = current; 28 task = current;
@@ -34,18 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
34 stack = (unsigned long *)task->thread.sp; 35 stack = (unsigned long *)task->thread.sp;
35 } 36 }
36 37
37#ifdef CONFIG_FRAME_POINTER 38 bp = stack_frame(task, regs);
38 if (!bp) {
39 if (task == current) {
40 /* Grab bp right from our regs */
41 get_bp(bp);
42 } else {
43 /* bp is the last reg pushed by switch_to */
44 bp = *(unsigned long *) task->thread.sp;
45 }
46 }
47#endif
48
49 for (;;) { 39 for (;;) {
50 struct thread_info *context; 40 struct thread_info *context;
51 41
@@ -65,7 +55,7 @@ EXPORT_SYMBOL(dump_trace);
65 55
66void 56void
67show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 57show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
68 unsigned long *sp, unsigned long bp, char *log_lvl) 58 unsigned long *sp, char *log_lvl)
69{ 59{
70 unsigned long *stack; 60 unsigned long *stack;
71 int i; 61 int i;
@@ -87,7 +77,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
87 touch_nmi_watchdog(); 77 touch_nmi_watchdog();
88 } 78 }
89 printk(KERN_CONT "\n"); 79 printk(KERN_CONT "\n");
90 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 80 show_trace_log_lvl(task, regs, sp, log_lvl);
91} 81}
92 82
93 83
@@ -112,8 +102,7 @@ void show_registers(struct pt_regs *regs)
112 u8 *ip; 102 u8 *ip;
113 103
114 printk(KERN_EMERG "Stack:\n"); 104 printk(KERN_EMERG "Stack:\n");
115 show_stack_log_lvl(NULL, regs, &regs->sp, 105 show_stack_log_lvl(NULL, regs, &regs->sp, KERN_EMERG);
116 0, KERN_EMERG);
117 106
118 printk(KERN_EMERG "Code: "); 107 printk(KERN_EMERG "Code: ");
119 108
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 6a340485249a..64101335de19 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -139,8 +139,8 @@ fixup_bp_irq_link(unsigned long bp, unsigned long *stack,
139 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack 139 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
140 */ 140 */
141 141
142void dump_trace(struct task_struct *task, struct pt_regs *regs, 142void dump_trace(struct task_struct *task,
143 unsigned long *stack, unsigned long bp, 143 struct pt_regs *regs, unsigned long *stack,
144 const struct stacktrace_ops *ops, void *data) 144 const struct stacktrace_ops *ops, void *data)
145{ 145{
146 const unsigned cpu = get_cpu(); 146 const unsigned cpu = get_cpu();
@@ -149,6 +149,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
149 unsigned used = 0; 149 unsigned used = 0;
150 struct thread_info *tinfo; 150 struct thread_info *tinfo;
151 int graph = 0; 151 int graph = 0;
152 unsigned long bp;
152 153
153 if (!task) 154 if (!task)
154 task = current; 155 task = current;
@@ -160,18 +161,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
160 stack = (unsigned long *)task->thread.sp; 161 stack = (unsigned long *)task->thread.sp;
161 } 162 }
162 163
163#ifdef CONFIG_FRAME_POINTER 164 bp = stack_frame(task, regs);
164 if (!bp) {
165 if (task == current) {
166 /* Grab bp right from our regs */
167 get_bp(bp);
168 } else {
169 /* bp is the last reg pushed by switch_to */
170 bp = *(unsigned long *) task->thread.sp;
171 }
172 }
173#endif
174
175 /* 165 /*
176 * Print function call entries in all stacks, starting at the 166 * Print function call entries in all stacks, starting at the
177 * current stack address. If the stacks consist of nested 167 * current stack address. If the stacks consist of nested
@@ -235,7 +225,7 @@ EXPORT_SYMBOL(dump_trace);
235 225
236void 226void
237show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 227show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
238 unsigned long *sp, unsigned long bp, char *log_lvl) 228 unsigned long *sp, char *log_lvl)
239{ 229{
240 unsigned long *irq_stack_end; 230 unsigned long *irq_stack_end;
241 unsigned long *irq_stack; 231 unsigned long *irq_stack;
@@ -279,7 +269,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
279 preempt_enable(); 269 preempt_enable();
280 270
281 printk(KERN_CONT "\n"); 271 printk(KERN_CONT "\n");
282 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 272 show_trace_log_lvl(task, regs, sp, log_lvl);
283} 273}
284 274
285void show_registers(struct pt_regs *regs) 275void show_registers(struct pt_regs *regs)
@@ -308,7 +298,7 @@ void show_registers(struct pt_regs *regs)
308 298
309 printk(KERN_EMERG "Stack:\n"); 299 printk(KERN_EMERG "Stack:\n");
310 show_stack_log_lvl(NULL, regs, (unsigned long *)sp, 300 show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
311 regs->bp, KERN_EMERG); 301 KERN_EMERG);
312 302
313 printk(KERN_EMERG "Code: "); 303 printk(KERN_EMERG "Code: ");
314 304
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 4572f25f9325..cd28a350f7f9 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -240,7 +240,7 @@ static int __init setup_early_printk(char *buf)
240 if (!strncmp(buf, "xen", 3)) 240 if (!strncmp(buf, "xen", 3))
241 early_console_register(&xenboot_console, keep); 241 early_console_register(&xenboot_console, keep);
242#endif 242#endif
243#ifdef CONFIG_X86_MRST_EARLY_PRINTK 243#ifdef CONFIG_EARLY_PRINTK_MRST
244 if (!strncmp(buf, "mrst", 4)) { 244 if (!strncmp(buf, "mrst", 4)) {
245 mrst_early_console_init(); 245 mrst_early_console_init();
246 early_console_register(&early_mrst_console, keep); 246 early_console_register(&early_mrst_console, keep);
@@ -250,7 +250,6 @@ static int __init setup_early_printk(char *buf)
250 hsu_early_console_init(); 250 hsu_early_console_init();
251 early_console_register(&early_hsu_console, keep); 251 early_console_register(&early_hsu_console, keep);
252 } 252 }
253
254#endif 253#endif
255 buf++; 254 buf++;
256 } 255 }
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 3afb33f14d2d..298448656b60 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -19,6 +19,7 @@
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/init.h> 20#include <linux/init.h>
21#include <linux/list.h> 21#include <linux/list.h>
22#include <linux/module.h>
22 23
23#include <trace/syscall.h> 24#include <trace/syscall.h>
24 25
@@ -49,6 +50,7 @@ static DEFINE_PER_CPU(int, save_modifying_code);
49int ftrace_arch_code_modify_prepare(void) 50int ftrace_arch_code_modify_prepare(void)
50{ 51{
51 set_kernel_text_rw(); 52 set_kernel_text_rw();
53 set_all_modules_text_rw();
52 modifying_code = 1; 54 modifying_code = 1;
53 return 0; 55 return 0;
54} 56}
@@ -56,6 +58,7 @@ int ftrace_arch_code_modify_prepare(void)
56int ftrace_arch_code_modify_post_process(void) 58int ftrace_arch_code_modify_post_process(void)
57{ 59{
58 modifying_code = 0; 60 modifying_code = 0;
61 set_all_modules_text_ro();
59 set_kernel_text_ro(); 62 set_kernel_text_ro();
60 return 0; 63 return 0;
61} 64}
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 763310165fa0..7f138b3c3c52 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -61,6 +61,9 @@ void __init i386_start_kernel(void)
61 case X86_SUBARCH_MRST: 61 case X86_SUBARCH_MRST:
62 x86_mrst_early_setup(); 62 x86_mrst_early_setup();
63 break; 63 break;
64 case X86_SUBARCH_CE4100:
65 x86_ce4100_early_setup();
66 break;
64 default: 67 default:
65 i386_default_early_setup(); 68 i386_default_early_setup();
66 break; 69 break;
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index c0dbd9ac24f0..5707fc8a7a4b 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -316,6 +316,10 @@ ENTRY(startup_32_smp)
316 subl $0x80000001, %eax 316 subl $0x80000001, %eax
317 cmpl $(0x8000ffff-0x80000001), %eax 317 cmpl $(0x8000ffff-0x80000001), %eax
318 ja 6f 318 ja 6f
319
320 /* Clear bogus XD_DISABLE bits */
321 call verify_cpu
322
319 mov $0x80000001, %eax 323 mov $0x80000001, %eax
320 cpuid 324 cpuid
321 /* Execute Disable bit supported? */ 325 /* Execute Disable bit supported? */
@@ -611,6 +615,8 @@ ignore_int:
611#endif 615#endif
612 iret 616 iret
613 617
618#include "verify_cpu.S"
619
614 __REFDATA 620 __REFDATA
615.align 4 621.align 4
616ENTRY(initial_code) 622ENTRY(initial_code)
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 1cbd54c0df99..5940282bd2f9 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1184,6 +1184,10 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
1184{ 1184{
1185 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 1185 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
1186 1186
1187 /* This is possible if op is under delayed unoptimizing */
1188 if (kprobe_disabled(&op->kp))
1189 return;
1190
1187 preempt_disable(); 1191 preempt_disable();
1188 if (kprobe_running()) { 1192 if (kprobe_running()) {
1189 kprobes_inc_nmissed_count(&op->kp); 1193 kprobes_inc_nmissed_count(&op->kp);
@@ -1401,10 +1405,16 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
1401 return 0; 1405 return 0;
1402} 1406}
1403 1407
1404/* Replace a breakpoint (int3) with a relative jump. */ 1408#define MAX_OPTIMIZE_PROBES 256
1405int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op) 1409static struct text_poke_param *jump_poke_params;
1410static struct jump_poke_buffer {
1411 u8 buf[RELATIVEJUMP_SIZE];
1412} *jump_poke_bufs;
1413
1414static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
1415 u8 *insn_buf,
1416 struct optimized_kprobe *op)
1406{ 1417{
1407 unsigned char jmp_code[RELATIVEJUMP_SIZE];
1408 s32 rel = (s32)((long)op->optinsn.insn - 1418 s32 rel = (s32)((long)op->optinsn.insn -
1409 ((long)op->kp.addr + RELATIVEJUMP_SIZE)); 1419 ((long)op->kp.addr + RELATIVEJUMP_SIZE));
1410 1420
@@ -1412,16 +1422,79 @@ int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
1412 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, 1422 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
1413 RELATIVE_ADDR_SIZE); 1423 RELATIVE_ADDR_SIZE);
1414 1424
1415 jmp_code[0] = RELATIVEJUMP_OPCODE; 1425 insn_buf[0] = RELATIVEJUMP_OPCODE;
1416 *(s32 *)(&jmp_code[1]) = rel; 1426 *(s32 *)(&insn_buf[1]) = rel;
1427
1428 tprm->addr = op->kp.addr;
1429 tprm->opcode = insn_buf;
1430 tprm->len = RELATIVEJUMP_SIZE;
1431}
1432
1433/*
1434 * Replace breakpoints (int3) with relative jumps.
1435 * Caller must call with locking kprobe_mutex and text_mutex.
1436 */
1437void __kprobes arch_optimize_kprobes(struct list_head *oplist)
1438{
1439 struct optimized_kprobe *op, *tmp;
1440 int c = 0;
1441
1442 list_for_each_entry_safe(op, tmp, oplist, list) {
1443 WARN_ON(kprobe_disabled(&op->kp));
1444 /* Setup param */
1445 setup_optimize_kprobe(&jump_poke_params[c],
1446 jump_poke_bufs[c].buf, op);
1447 list_del_init(&op->list);
1448 if (++c >= MAX_OPTIMIZE_PROBES)
1449 break;
1450 }
1417 1451
1418 /* 1452 /*
1419 * text_poke_smp doesn't support NMI/MCE code modifying. 1453 * text_poke_smp doesn't support NMI/MCE code modifying.
1420 * However, since kprobes itself also doesn't support NMI/MCE 1454 * However, since kprobes itself also doesn't support NMI/MCE
1421 * code probing, it's not a problem. 1455 * code probing, it's not a problem.
1422 */ 1456 */
1423 text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE); 1457 text_poke_smp_batch(jump_poke_params, c);
1424 return 0; 1458}
1459
1460static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
1461 u8 *insn_buf,
1462 struct optimized_kprobe *op)
1463{
1464 /* Set int3 to first byte for kprobes */
1465 insn_buf[0] = BREAKPOINT_INSTRUCTION;
1466 memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
1467
1468 tprm->addr = op->kp.addr;
1469 tprm->opcode = insn_buf;
1470 tprm->len = RELATIVEJUMP_SIZE;
1471}
1472
1473/*
1474 * Recover original instructions and breakpoints from relative jumps.
1475 * Caller must call with locking kprobe_mutex.
1476 */
1477extern void arch_unoptimize_kprobes(struct list_head *oplist,
1478 struct list_head *done_list)
1479{
1480 struct optimized_kprobe *op, *tmp;
1481 int c = 0;
1482
1483 list_for_each_entry_safe(op, tmp, oplist, list) {
1484 /* Setup param */
1485 setup_unoptimize_kprobe(&jump_poke_params[c],
1486 jump_poke_bufs[c].buf, op);
1487 list_move(&op->list, done_list);
1488 if (++c >= MAX_OPTIMIZE_PROBES)
1489 break;
1490 }
1491
1492 /*
1493 * text_poke_smp doesn't support NMI/MCE code modifying.
1494 * However, since kprobes itself also doesn't support NMI/MCE
1495 * code probing, it's not a problem.
1496 */
1497 text_poke_smp_batch(jump_poke_params, c);
1425} 1498}
1426 1499
1427/* Replace a relative jump with a breakpoint (int3). */ 1500/* Replace a relative jump with a breakpoint (int3). */
@@ -1453,11 +1526,35 @@ static int __kprobes setup_detour_execution(struct kprobe *p,
1453 } 1526 }
1454 return 0; 1527 return 0;
1455} 1528}
1529
1530static int __kprobes init_poke_params(void)
1531{
1532 /* Allocate code buffer and parameter array */
1533 jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
1534 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
1535 if (!jump_poke_bufs)
1536 return -ENOMEM;
1537
1538 jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
1539 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
1540 if (!jump_poke_params) {
1541 kfree(jump_poke_bufs);
1542 jump_poke_bufs = NULL;
1543 return -ENOMEM;
1544 }
1545
1546 return 0;
1547}
1548#else /* !CONFIG_OPTPROBES */
1549static int __kprobes init_poke_params(void)
1550{
1551 return 0;
1552}
1456#endif 1553#endif
1457 1554
1458int __init arch_init_kprobes(void) 1555int __init arch_init_kprobes(void)
1459{ 1556{
1460 return 0; 1557 return init_poke_params();
1461} 1558}
1462 1559
1463int __kprobes arch_trampoline_kprobe(struct kprobe *p) 1560int __kprobes arch_trampoline_kprobe(struct kprobe *p)
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index ce0cb4721c9a..0fe6d1a66c38 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -155,12 +155,6 @@ static int apply_microcode_amd(int cpu)
155 return 0; 155 return 0;
156} 156}
157 157
158static int get_ucode_data(void *to, const u8 *from, size_t n)
159{
160 memcpy(to, from, n);
161 return 0;
162}
163
164static void * 158static void *
165get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) 159get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
166{ 160{
@@ -168,8 +162,7 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
168 u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; 162 u8 section_hdr[UCODE_CONTAINER_SECTION_HDR];
169 void *mc; 163 void *mc;
170 164
171 if (get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR)) 165 get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR);
172 return NULL;
173 166
174 if (section_hdr[0] != UCODE_UCODE_TYPE) { 167 if (section_hdr[0] != UCODE_UCODE_TYPE) {
175 pr_err("error: invalid type field in container file section header\n"); 168 pr_err("error: invalid type field in container file section header\n");
@@ -183,16 +176,13 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
183 return NULL; 176 return NULL;
184 } 177 }
185 178
186 mc = vmalloc(UCODE_MAX_SIZE); 179 mc = vzalloc(UCODE_MAX_SIZE);
187 if (mc) { 180 if (!mc)
188 memset(mc, 0, UCODE_MAX_SIZE); 181 return NULL;
189 if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, 182
190 total_size)) { 183 get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size);
191 vfree(mc); 184 *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
192 mc = NULL; 185
193 } else
194 *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
195 }
196 return mc; 186 return mc;
197} 187}
198 188
@@ -202,8 +192,7 @@ static int install_equiv_cpu_table(const u8 *buf)
202 unsigned int *buf_pos = (unsigned int *)container_hdr; 192 unsigned int *buf_pos = (unsigned int *)container_hdr;
203 unsigned long size; 193 unsigned long size;
204 194
205 if (get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE)) 195 get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE);
206 return 0;
207 196
208 size = buf_pos[2]; 197 size = buf_pos[2];
209 198
@@ -219,10 +208,7 @@ static int install_equiv_cpu_table(const u8 *buf)
219 } 208 }
220 209
221 buf += UCODE_CONTAINER_HEADER_SIZE; 210 buf += UCODE_CONTAINER_HEADER_SIZE;
222 if (get_ucode_data(equiv_cpu_table, buf, size)) { 211 get_ucode_data(equiv_cpu_table, buf, size);
223 vfree(equiv_cpu_table);
224 return 0;
225 }
226 212
227 return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ 213 return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */
228} 214}
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index dcb65cc0a053..1a1b606d3e92 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -364,8 +364,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
364 364
365 /* For performance reasons, reuse mc area when possible */ 365 /* For performance reasons, reuse mc area when possible */
366 if (!mc || mc_size > curr_mc_size) { 366 if (!mc || mc_size > curr_mc_size) {
367 if (mc) 367 vfree(mc);
368 vfree(mc);
369 mc = vmalloc(mc_size); 368 mc = vmalloc(mc_size);
370 if (!mc) 369 if (!mc)
371 break; 370 break;
@@ -374,13 +373,11 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
374 373
375 if (get_ucode_data(mc, ucode_ptr, mc_size) || 374 if (get_ucode_data(mc, ucode_ptr, mc_size) ||
376 microcode_sanity_check(mc) < 0) { 375 microcode_sanity_check(mc) < 0) {
377 vfree(mc);
378 break; 376 break;
379 } 377 }
380 378
381 if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) { 379 if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) {
382 if (new_mc) 380 vfree(new_mc);
383 vfree(new_mc);
384 new_rev = mc_header.rev; 381 new_rev = mc_header.rev;
385 new_mc = mc; 382 new_mc = mc;
386 mc = NULL; /* trigger new vmalloc */ 383 mc = NULL; /* trigger new vmalloc */
@@ -390,12 +387,10 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
390 leftover -= mc_size; 387 leftover -= mc_size;
391 } 388 }
392 389
393 if (mc) 390 vfree(mc);
394 vfree(mc);
395 391
396 if (leftover) { 392 if (leftover) {
397 if (new_mc) 393 vfree(new_mc);
398 vfree(new_mc);
399 state = UCODE_ERROR; 394 state = UCODE_ERROR;
400 goto out; 395 goto out;
401 } 396 }
@@ -405,8 +400,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
405 goto out; 400 goto out;
406 } 401 }
407 402
408 if (uci->mc) 403 vfree(uci->mc);
409 vfree(uci->mc);
410 uci->mc = (struct microcode_intel *)new_mc; 404 uci->mc = (struct microcode_intel *)new_mc;
411 405
412 pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", 406 pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index ba0f0ca9f280..c01ffa5b9b87 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -143,7 +143,7 @@ static void flush_gart(void)
143 143
144 spin_lock_irqsave(&iommu_bitmap_lock, flags); 144 spin_lock_irqsave(&iommu_bitmap_lock, flags);
145 if (need_flush) { 145 if (need_flush) {
146 k8_flush_garts(); 146 amd_flush_garts();
147 need_flush = false; 147 need_flush = false;
148 } 148 }
149 spin_unlock_irqrestore(&iommu_bitmap_lock, flags); 149 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
@@ -561,17 +561,17 @@ static void enable_gart_translations(void)
561{ 561{
562 int i; 562 int i;
563 563
564 if (!k8_northbridges.gart_supported) 564 if (!amd_nb_has_feature(AMD_NB_GART))
565 return; 565 return;
566 566
567 for (i = 0; i < k8_northbridges.num; i++) { 567 for (i = 0; i < amd_nb_num(); i++) {
568 struct pci_dev *dev = k8_northbridges.nb_misc[i]; 568 struct pci_dev *dev = node_to_amd_nb(i)->misc;
569 569
570 enable_gart_translation(dev, __pa(agp_gatt_table)); 570 enable_gart_translation(dev, __pa(agp_gatt_table));
571 } 571 }
572 572
573 /* Flush the GART-TLB to remove stale entries */ 573 /* Flush the GART-TLB to remove stale entries */
574 k8_flush_garts(); 574 amd_flush_garts();
575} 575}
576 576
577/* 577/*
@@ -596,13 +596,13 @@ static void gart_fixup_northbridges(struct sys_device *dev)
596 if (!fix_up_north_bridges) 596 if (!fix_up_north_bridges)
597 return; 597 return;
598 598
599 if (!k8_northbridges.gart_supported) 599 if (!amd_nb_has_feature(AMD_NB_GART))
600 return; 600 return;
601 601
602 pr_info("PCI-DMA: Restoring GART aperture settings\n"); 602 pr_info("PCI-DMA: Restoring GART aperture settings\n");
603 603
604 for (i = 0; i < k8_northbridges.num; i++) { 604 for (i = 0; i < amd_nb_num(); i++) {
605 struct pci_dev *dev = k8_northbridges.nb_misc[i]; 605 struct pci_dev *dev = node_to_amd_nb(i)->misc;
606 606
607 /* 607 /*
608 * Don't enable translations just yet. That is the next 608 * Don't enable translations just yet. That is the next
@@ -644,7 +644,7 @@ static struct sys_device device_gart = {
644 * Private Northbridge GATT initialization in case we cannot use the 644 * Private Northbridge GATT initialization in case we cannot use the
645 * AGP driver for some reason. 645 * AGP driver for some reason.
646 */ 646 */
647static __init int init_k8_gatt(struct agp_kern_info *info) 647static __init int init_amd_gatt(struct agp_kern_info *info)
648{ 648{
649 unsigned aper_size, gatt_size, new_aper_size; 649 unsigned aper_size, gatt_size, new_aper_size;
650 unsigned aper_base, new_aper_base; 650 unsigned aper_base, new_aper_base;
@@ -656,8 +656,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
656 656
657 aper_size = aper_base = info->aper_size = 0; 657 aper_size = aper_base = info->aper_size = 0;
658 dev = NULL; 658 dev = NULL;
659 for (i = 0; i < k8_northbridges.num; i++) { 659 for (i = 0; i < amd_nb_num(); i++) {
660 dev = k8_northbridges.nb_misc[i]; 660 dev = node_to_amd_nb(i)->misc;
661 new_aper_base = read_aperture(dev, &new_aper_size); 661 new_aper_base = read_aperture(dev, &new_aper_size);
662 if (!new_aper_base) 662 if (!new_aper_base)
663 goto nommu; 663 goto nommu;
@@ -725,13 +725,13 @@ static void gart_iommu_shutdown(void)
725 if (!no_agp) 725 if (!no_agp)
726 return; 726 return;
727 727
728 if (!k8_northbridges.gart_supported) 728 if (!amd_nb_has_feature(AMD_NB_GART))
729 return; 729 return;
730 730
731 for (i = 0; i < k8_northbridges.num; i++) { 731 for (i = 0; i < amd_nb_num(); i++) {
732 u32 ctl; 732 u32 ctl;
733 733
734 dev = k8_northbridges.nb_misc[i]; 734 dev = node_to_amd_nb(i)->misc;
735 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); 735 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
736 736
737 ctl &= ~GARTEN; 737 ctl &= ~GARTEN;
@@ -749,14 +749,14 @@ int __init gart_iommu_init(void)
749 unsigned long scratch; 749 unsigned long scratch;
750 long i; 750 long i;
751 751
752 if (!k8_northbridges.gart_supported) 752 if (!amd_nb_has_feature(AMD_NB_GART))
753 return 0; 753 return 0;
754 754
755#ifndef CONFIG_AGP_AMD64 755#ifndef CONFIG_AGP_AMD64
756 no_agp = 1; 756 no_agp = 1;
757#else 757#else
758 /* Makefile puts PCI initialization via subsys_initcall first. */ 758 /* Makefile puts PCI initialization via subsys_initcall first. */
759 /* Add other K8 AGP bridge drivers here */ 759 /* Add other AMD AGP bridge drivers here */
760 no_agp = no_agp || 760 no_agp = no_agp ||
761 (agp_amd64_init() < 0) || 761 (agp_amd64_init() < 0) ||
762 (agp_copy_info(agp_bridge, &info) < 0); 762 (agp_copy_info(agp_bridge, &info) < 0);
@@ -765,7 +765,7 @@ int __init gart_iommu_init(void)
765 if (no_iommu || 765 if (no_iommu ||
766 (!force_iommu && max_pfn <= MAX_DMA32_PFN) || 766 (!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
767 !gart_iommu_aperture || 767 !gart_iommu_aperture ||
768 (no_agp && init_k8_gatt(&info) < 0)) { 768 (no_agp && init_amd_gatt(&info) < 0)) {
769 if (max_pfn > MAX_DMA32_PFN) { 769 if (max_pfn > MAX_DMA32_PFN) {
770 pr_warning("More than 4GB of memory but GART IOMMU not available.\n"); 770 pr_warning("More than 4GB of memory but GART IOMMU not available.\n");
771 pr_warning("falling back to iommu=soft.\n"); 771 pr_warning("falling back to iommu=soft.\n");
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 57d1868a86aa..c852041bfc3d 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -91,8 +91,7 @@ void exit_thread(void)
91void show_regs(struct pt_regs *regs) 91void show_regs(struct pt_regs *regs)
92{ 92{
93 show_registers(regs); 93 show_registers(regs);
94 show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), 94 show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs));
95 regs->bp);
96} 95}
97 96
98void show_regs_common(void) 97void show_regs_common(void)
@@ -374,6 +373,7 @@ void default_idle(void)
374{ 373{
375 if (hlt_use_halt()) { 374 if (hlt_use_halt()) {
376 trace_power_start(POWER_CSTATE, 1, smp_processor_id()); 375 trace_power_start(POWER_CSTATE, 1, smp_processor_id());
376 trace_cpu_idle(1, smp_processor_id());
377 current_thread_info()->status &= ~TS_POLLING; 377 current_thread_info()->status &= ~TS_POLLING;
378 /* 378 /*
379 * TS_POLLING-cleared state must be visible before we 379 * TS_POLLING-cleared state must be visible before we
@@ -444,6 +444,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
444void mwait_idle_with_hints(unsigned long ax, unsigned long cx) 444void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
445{ 445{
446 trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id()); 446 trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id());
447 trace_cpu_idle((ax>>4)+1, smp_processor_id());
447 if (!need_resched()) { 448 if (!need_resched()) {
448 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) 449 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
449 clflush((void *)&current_thread_info()->flags); 450 clflush((void *)&current_thread_info()->flags);
@@ -460,6 +461,7 @@ static void mwait_idle(void)
460{ 461{
461 if (!need_resched()) { 462 if (!need_resched()) {
462 trace_power_start(POWER_CSTATE, 1, smp_processor_id()); 463 trace_power_start(POWER_CSTATE, 1, smp_processor_id());
464 trace_cpu_idle(1, smp_processor_id());
463 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) 465 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
464 clflush((void *)&current_thread_info()->flags); 466 clflush((void *)&current_thread_info()->flags);
465 467
@@ -481,10 +483,12 @@ static void mwait_idle(void)
481static void poll_idle(void) 483static void poll_idle(void)
482{ 484{
483 trace_power_start(POWER_CSTATE, 0, smp_processor_id()); 485 trace_power_start(POWER_CSTATE, 0, smp_processor_id());
486 trace_cpu_idle(0, smp_processor_id());
484 local_irq_enable(); 487 local_irq_enable();
485 while (!need_resched()) 488 while (!need_resched())
486 cpu_relax(); 489 cpu_relax();
487 trace_power_end(0); 490 trace_power_end(smp_processor_id());
491 trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
488} 492}
489 493
490/* 494/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 96586c3cbbbf..4b9befa0e347 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -113,8 +113,8 @@ void cpu_idle(void)
113 stop_critical_timings(); 113 stop_critical_timings();
114 pm_idle(); 114 pm_idle();
115 start_critical_timings(); 115 start_critical_timings();
116
117 trace_power_end(smp_processor_id()); 116 trace_power_end(smp_processor_id());
117 trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
118 } 118 }
119 tick_nohz_restart_sched_tick(); 119 tick_nohz_restart_sched_tick();
120 preempt_enable_no_resched(); 120 preempt_enable_no_resched();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b3d7a3a04f38..4c818a738396 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -142,6 +142,8 @@ void cpu_idle(void)
142 start_critical_timings(); 142 start_critical_timings();
143 143
144 trace_power_end(smp_processor_id()); 144 trace_power_end(smp_processor_id());
145 trace_cpu_idle(PWR_EVENT_EXIT,
146 smp_processor_id());
145 147
146 /* In many cases the interrupt that ended idle 148 /* In many cases the interrupt that ended idle
147 has already called exit_idle. But some idle 149 has already called exit_idle. But some idle
diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c
index fda313ebbb03..c8e41e90f59c 100644
--- a/arch/x86/kernel/reboot_fixups_32.c
+++ b/arch/x86/kernel/reboot_fixups_32.c
@@ -43,17 +43,33 @@ static void rdc321x_reset(struct pci_dev *dev)
43 outb(1, 0x92); 43 outb(1, 0x92);
44} 44}
45 45
46static void ce4100_reset(struct pci_dev *dev)
47{
48 int i;
49
50 for (i = 0; i < 10; i++) {
51 outb(0x2, 0xcf9);
52 udelay(50);
53 }
54}
55
46struct device_fixup { 56struct device_fixup {
47 unsigned int vendor; 57 unsigned int vendor;
48 unsigned int device; 58 unsigned int device;
49 void (*reboot_fixup)(struct pci_dev *); 59 void (*reboot_fixup)(struct pci_dev *);
50}; 60};
51 61
62/*
63 * PCI ids solely used for fixups_table go here
64 */
65#define PCI_DEVICE_ID_INTEL_CE4100 0x0708
66
52static const struct device_fixup fixups_table[] = { 67static const struct device_fixup fixups_table[] = {
53{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, 68{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset },
54{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset }, 69{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset },
55{ PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset }, 70{ PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset },
56{ PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030, rdc321x_reset }, 71{ PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030, rdc321x_reset },
72{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CE4100, ce4100_reset },
57}; 73};
58 74
59/* 75/*
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 85268f8eadf6..d3cfe26c0252 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -501,7 +501,18 @@ static inline unsigned long long get_total_mem(void)
501 return total << PAGE_SHIFT; 501 return total << PAGE_SHIFT;
502} 502}
503 503
504#define DEFAULT_BZIMAGE_ADDR_MAX 0x37FFFFFF 504/*
505 * Keep the crash kernel below this limit. On 32 bits earlier kernels
506 * would limit the kernel to the low 512 MiB due to mapping restrictions.
507 * On 64 bits, kexec-tools currently limits us to 896 MiB; increase this
508 * limit once kexec-tools are fixed.
509 */
510#ifdef CONFIG_X86_32
511# define CRASH_KERNEL_ADDR_MAX (512 << 20)
512#else
513# define CRASH_KERNEL_ADDR_MAX (896 << 20)
514#endif
515
505static void __init reserve_crashkernel(void) 516static void __init reserve_crashkernel(void)
506{ 517{
507 unsigned long long total_mem; 518 unsigned long long total_mem;
@@ -520,10 +531,10 @@ static void __init reserve_crashkernel(void)
520 const unsigned long long alignment = 16<<20; /* 16M */ 531 const unsigned long long alignment = 16<<20; /* 16M */
521 532
522 /* 533 /*
523 * kexec want bzImage is below DEFAULT_BZIMAGE_ADDR_MAX 534 * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
524 */ 535 */
525 crash_base = memblock_find_in_range(alignment, 536 crash_base = memblock_find_in_range(alignment,
526 DEFAULT_BZIMAGE_ADDR_MAX, crash_size, alignment); 537 CRASH_KERNEL_ADDR_MAX, crash_size, alignment);
527 538
528 if (crash_base == MEMBLOCK_ERROR) { 539 if (crash_base == MEMBLOCK_ERROR) {
529 pr_info("crashkernel reservation failed - No suitable area found.\n"); 540 pr_info("crashkernel reservation failed - No suitable area found.\n");
@@ -694,7 +705,7 @@ static u64 __init get_max_mapped(void)
694void __init setup_arch(char **cmdline_p) 705void __init setup_arch(char **cmdline_p)
695{ 706{
696 int acpi = 0; 707 int acpi = 0;
697 int k8 = 0; 708 int amd = 0;
698 unsigned long flags; 709 unsigned long flags;
699 710
700#ifdef CONFIG_X86_32 711#ifdef CONFIG_X86_32
@@ -980,12 +991,12 @@ void __init setup_arch(char **cmdline_p)
980 acpi = acpi_numa_init(); 991 acpi = acpi_numa_init();
981#endif 992#endif
982 993
983#ifdef CONFIG_K8_NUMA 994#ifdef CONFIG_AMD_NUMA
984 if (!acpi) 995 if (!acpi)
985 k8 = !k8_numa_init(0, max_pfn); 996 amd = !amd_numa_init(0, max_pfn);
986#endif 997#endif
987 998
988 initmem_init(0, max_pfn, acpi, k8); 999 initmem_init(0, max_pfn, acpi, amd);
989 memblock_find_dma_reserve(); 1000 memblock_find_dma_reserve();
990 dma32_reserve_bootmem(); 1001 dma32_reserve_bootmem();
991 1002
@@ -1034,10 +1045,7 @@ void __init setup_arch(char **cmdline_p)
1034#endif 1045#endif
1035 1046
1036 init_apic_mappings(); 1047 init_apic_mappings();
1037 ioapic_init_mappings(); 1048 ioapic_and_gsi_init();
1038
1039 /* need to wait for io_apic is mapped */
1040 probe_nr_irqs_gsi();
1041 1049
1042 kvm_guest_init(); 1050 kvm_guest_init();
1043 1051
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 083e99d1b7df..68f61ac632e1 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -281,6 +281,13 @@ static void __cpuinit smp_callin(void)
281 */ 281 */
282 smp_store_cpu_info(cpuid); 282 smp_store_cpu_info(cpuid);
283 283
284 /*
285 * This must be done before setting cpu_online_mask
286 * or calling notify_cpu_starting.
287 */
288 set_cpu_sibling_map(raw_smp_processor_id());
289 wmb();
290
284 notify_cpu_starting(cpuid); 291 notify_cpu_starting(cpuid);
285 292
286 /* 293 /*
@@ -316,16 +323,6 @@ notrace static void __cpuinit start_secondary(void *unused)
316 */ 323 */
317 check_tsc_sync_target(); 324 check_tsc_sync_target();
318 325
319 if (nmi_watchdog == NMI_IO_APIC) {
320 legacy_pic->mask(0);
321 enable_NMI_through_LVT0();
322 legacy_pic->unmask(0);
323 }
324
325 /* This must be done before setting cpu_online_mask */
326 set_cpu_sibling_map(raw_smp_processor_id());
327 wmb();
328
329 /* 326 /*
330 * We need to hold call_lock, so there is no inconsistency 327 * We need to hold call_lock, so there is no inconsistency
331 * between the time smp_call_function() determines number of 328 * between the time smp_call_function() determines number of
@@ -1061,8 +1058,6 @@ static int __init smp_sanity_check(unsigned max_cpus)
1061 printk(KERN_INFO "SMP mode deactivated.\n"); 1058 printk(KERN_INFO "SMP mode deactivated.\n");
1062 smpboot_clear_io_apic(); 1059 smpboot_clear_io_apic();
1063 1060
1064 localise_nmi_watchdog();
1065
1066 connect_bsp_APIC(); 1061 connect_bsp_APIC();
1067 setup_local_APIC(); 1062 setup_local_APIC();
1068 end_local_APIC_setup(); 1063 end_local_APIC_setup();
@@ -1196,7 +1191,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1196#ifdef CONFIG_X86_IO_APIC 1191#ifdef CONFIG_X86_IO_APIC
1197 setup_ioapic_dest(); 1192 setup_ioapic_dest();
1198#endif 1193#endif
1199 check_nmi_watchdog();
1200 mtrr_aps_init(); 1194 mtrr_aps_init();
1201} 1195}
1202 1196
@@ -1341,8 +1335,6 @@ int native_cpu_disable(void)
1341 if (cpu == 0) 1335 if (cpu == 0)
1342 return -EBUSY; 1336 return -EBUSY;
1343 1337
1344 if (nmi_watchdog == NMI_LOCAL_APIC)
1345 stop_apic_nmi_watchdog(NULL);
1346 clear_local_APIC(); 1338 clear_local_APIC();
1347 1339
1348 cpu_disable_common(); 1340 cpu_disable_common();
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index b53c525368a7..938c8e10a19a 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -73,22 +73,22 @@ static const struct stacktrace_ops save_stack_ops_nosched = {
73 */ 73 */
74void save_stack_trace(struct stack_trace *trace) 74void save_stack_trace(struct stack_trace *trace)
75{ 75{
76 dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace); 76 dump_trace(current, NULL, NULL, &save_stack_ops, trace);
77 if (trace->nr_entries < trace->max_entries) 77 if (trace->nr_entries < trace->max_entries)
78 trace->entries[trace->nr_entries++] = ULONG_MAX; 78 trace->entries[trace->nr_entries++] = ULONG_MAX;
79} 79}
80EXPORT_SYMBOL_GPL(save_stack_trace); 80EXPORT_SYMBOL_GPL(save_stack_trace);
81 81
82void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp) 82void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs)
83{ 83{
84 dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace); 84 dump_trace(current, regs, NULL, &save_stack_ops, trace);
85 if (trace->nr_entries < trace->max_entries) 85 if (trace->nr_entries < trace->max_entries)
86 trace->entries[trace->nr_entries++] = ULONG_MAX; 86 trace->entries[trace->nr_entries++] = ULONG_MAX;
87} 87}
88 88
89void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) 89void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
90{ 90{
91 dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); 91 dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace);
92 if (trace->nr_entries < trace->max_entries) 92 if (trace->nr_entries < trace->max_entries)
93 trace->entries[trace->nr_entries++] = ULONG_MAX; 93 trace->entries[trace->nr_entries++] = ULONG_MAX;
94} 94}
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index fb5cc5e14cfa..25a28a245937 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -22,10 +22,6 @@
22#include <asm/hpet.h> 22#include <asm/hpet.h>
23#include <asm/time.h> 23#include <asm/time.h>
24 24
25#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC)
26int timer_ack;
27#endif
28
29#ifdef CONFIG_X86_64 25#ifdef CONFIG_X86_64
30volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; 26volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
31#endif 27#endif
@@ -63,20 +59,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
63 /* Keep nmi watchdog up to date */ 59 /* Keep nmi watchdog up to date */
64 inc_irq_stat(irq0_irqs); 60 inc_irq_stat(irq0_irqs);
65 61
66 /* Optimized out for !IO_APIC and x86_64 */
67 if (timer_ack) {
68 /*
69 * Subtle, when I/O APICs are used we have to ack timer IRQ
70 * manually to deassert NMI lines for the watchdog if run
71 * on an 82489DX-based system.
72 */
73 raw_spin_lock(&i8259A_lock);
74 outb(0x0c, PIC_MASTER_OCW3);
75 /* Ack the IRQ; AEOI will end it automatically. */
76 inb(PIC_MASTER_POLL);
77 raw_spin_unlock(&i8259A_lock);
78 }
79
80 global_clock_event->event_handler(global_clock_event); 62 global_clock_event->event_handler(global_clock_event);
81 63
82 /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ 64 /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index 3af2dff58b21..075d130efcf9 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -127,7 +127,7 @@ startup_64:
127no_longmode: 127no_longmode:
128 hlt 128 hlt
129 jmp no_longmode 129 jmp no_longmode
130#include "verify_cpu_64.S" 130#include "verify_cpu.S"
131 131
132 # Careful these need to be in the same 64K segment as the above; 132 # Careful these need to be in the same 64K segment as the above;
133tidt: 133tidt:
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index cb838ca42c96..c76aaca5694d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -83,6 +83,8 @@ EXPORT_SYMBOL_GPL(used_vectors);
83 83
84static int ignore_nmis; 84static int ignore_nmis;
85 85
86int unknown_nmi_panic;
87
86static inline void conditional_sti(struct pt_regs *regs) 88static inline void conditional_sti(struct pt_regs *regs)
87{ 89{
88 if (regs->flags & X86_EFLAGS_IF) 90 if (regs->flags & X86_EFLAGS_IF)
@@ -300,6 +302,13 @@ gp_in_kernel:
300 die("general protection fault", regs, error_code); 302 die("general protection fault", regs, error_code);
301} 303}
302 304
305static int __init setup_unknown_nmi_panic(char *str)
306{
307 unknown_nmi_panic = 1;
308 return 1;
309}
310__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
311
303static notrace __kprobes void 312static notrace __kprobes void
304mem_parity_error(unsigned char reason, struct pt_regs *regs) 313mem_parity_error(unsigned char reason, struct pt_regs *regs)
305{ 314{
@@ -342,9 +351,11 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
342 reason = (reason & 0xf) | 8; 351 reason = (reason & 0xf) | 8;
343 outb(reason, 0x61); 352 outb(reason, 0x61);
344 353
345 i = 2000; 354 i = 20000;
346 while (--i) 355 while (--i) {
347 udelay(1000); 356 touch_nmi_watchdog();
357 udelay(100);
358 }
348 359
349 reason &= ~8; 360 reason &= ~8;
350 outb(reason, 0x61); 361 outb(reason, 0x61);
@@ -371,7 +382,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
371 reason, smp_processor_id()); 382 reason, smp_processor_id());
372 383
373 printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); 384 printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
374 if (panic_on_unrecovered_nmi) 385 if (unknown_nmi_panic || panic_on_unrecovered_nmi)
375 panic("NMI: Not continuing"); 386 panic("NMI: Not continuing");
376 387
377 printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); 388 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
@@ -397,20 +408,8 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
397 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) 408 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
398 == NOTIFY_STOP) 409 == NOTIFY_STOP)
399 return; 410 return;
400
401#ifndef CONFIG_LOCKUP_DETECTOR
402 /*
403 * Ok, so this is none of the documented NMI sources,
404 * so it must be the NMI watchdog.
405 */
406 if (nmi_watchdog_tick(regs, reason))
407 return;
408 if (!do_nmi_callback(regs, cpu))
409#endif /* !CONFIG_LOCKUP_DETECTOR */
410 unknown_nmi_error(reason, regs);
411#else
412 unknown_nmi_error(reason, regs);
413#endif 411#endif
412 unknown_nmi_error(reason, regs);
414 413
415 return; 414 return;
416 } 415 }
@@ -446,14 +445,12 @@ do_nmi(struct pt_regs *regs, long error_code)
446 445
447void stop_nmi(void) 446void stop_nmi(void)
448{ 447{
449 acpi_nmi_disable();
450 ignore_nmis++; 448 ignore_nmis++;
451} 449}
452 450
453void restart_nmi(void) 451void restart_nmi(void)
454{ 452{
455 ignore_nmis--; 453 ignore_nmis--;
456 acpi_nmi_enable();
457} 454}
458 455
459/* May run on IST stack. */ 456/* May run on IST stack. */
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 0c40d8b72416..356a0d455cf9 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -872,6 +872,9 @@ __cpuinit int unsynchronized_tsc(void)
872 872
873 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 873 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
874 return 0; 874 return 0;
875
876 if (tsc_clocksource_reliable)
877 return 0;
875 /* 878 /*
876 * Intel systems are normally all synchronized. 879 * Intel systems are normally all synchronized.
877 * Exceptions must mark TSC as unstable: 880 * Exceptions must mark TSC as unstable:
@@ -879,14 +882,92 @@ __cpuinit int unsynchronized_tsc(void)
879 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { 882 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
880 /* assume multi socket systems are not synchronized: */ 883 /* assume multi socket systems are not synchronized: */
881 if (num_possible_cpus() > 1) 884 if (num_possible_cpus() > 1)
882 tsc_unstable = 1; 885 return 1;
883 } 886 }
884 887
885 return tsc_unstable; 888 return 0;
889}
890
891
892static void tsc_refine_calibration_work(struct work_struct *work);
893static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
894/**
895 * tsc_refine_calibration_work - Further refine tsc freq calibration
896 * @work - ignored.
897 *
898 * This functions uses delayed work over a period of a
899 * second to further refine the TSC freq value. Since this is
900 * timer based, instead of loop based, we don't block the boot
901 * process while this longer calibration is done.
902 *
903 * If there are any calibration anomolies (too many SMIs, etc),
904 * or the refined calibration is off by 1% of the fast early
905 * calibration, we throw out the new calibration and use the
906 * early calibration.
907 */
908static void tsc_refine_calibration_work(struct work_struct *work)
909{
910 static u64 tsc_start = -1, ref_start;
911 static int hpet;
912 u64 tsc_stop, ref_stop, delta;
913 unsigned long freq;
914
915 /* Don't bother refining TSC on unstable systems */
916 if (check_tsc_unstable())
917 goto out;
918
919 /*
920 * Since the work is started early in boot, we may be
921 * delayed the first time we expire. So set the workqueue
922 * again once we know timers are working.
923 */
924 if (tsc_start == -1) {
925 /*
926 * Only set hpet once, to avoid mixing hardware
927 * if the hpet becomes enabled later.
928 */
929 hpet = is_hpet_enabled();
930 schedule_delayed_work(&tsc_irqwork, HZ);
931 tsc_start = tsc_read_refs(&ref_start, hpet);
932 return;
933 }
934
935 tsc_stop = tsc_read_refs(&ref_stop, hpet);
936
937 /* hpet or pmtimer available ? */
938 if (!hpet && !ref_start && !ref_stop)
939 goto out;
940
941 /* Check, whether the sampling was disturbed by an SMI */
942 if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX)
943 goto out;
944
945 delta = tsc_stop - tsc_start;
946 delta *= 1000000LL;
947 if (hpet)
948 freq = calc_hpet_ref(delta, ref_start, ref_stop);
949 else
950 freq = calc_pmtimer_ref(delta, ref_start, ref_stop);
951
952 /* Make sure we're within 1% */
953 if (abs(tsc_khz - freq) > tsc_khz/100)
954 goto out;
955
956 tsc_khz = freq;
957 printk(KERN_INFO "Refined TSC clocksource calibration: "
958 "%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000,
959 (unsigned long)tsc_khz % 1000);
960
961out:
962 clocksource_register_khz(&clocksource_tsc, tsc_khz);
886} 963}
887 964
888static void __init init_tsc_clocksource(void) 965
966static int __init init_tsc_clocksource(void)
889{ 967{
968 if (!cpu_has_tsc || tsc_disabled > 0)
969 return 0;
970
890 if (tsc_clocksource_reliable) 971 if (tsc_clocksource_reliable)
891 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; 972 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
892 /* lower the rating if we already know its unstable: */ 973 /* lower the rating if we already know its unstable: */
@@ -894,8 +975,14 @@ static void __init init_tsc_clocksource(void)
894 clocksource_tsc.rating = 0; 975 clocksource_tsc.rating = 0;
895 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; 976 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
896 } 977 }
897 clocksource_register_khz(&clocksource_tsc, tsc_khz); 978 schedule_delayed_work(&tsc_irqwork, 0);
979 return 0;
898} 980}
981/*
982 * We use device_initcall here, to ensure we run after the hpet
983 * is fully initialized, which may occur at fs_initcall time.
984 */
985device_initcall(init_tsc_clocksource);
899 986
900void __init tsc_init(void) 987void __init tsc_init(void)
901{ 988{
@@ -949,6 +1036,5 @@ void __init tsc_init(void)
949 mark_tsc_unstable("TSCs unsynchronized"); 1036 mark_tsc_unstable("TSCs unsynchronized");
950 1037
951 check_system_tsc_reliable(); 1038 check_system_tsc_reliable();
952 init_tsc_clocksource();
953} 1039}
954 1040
diff --git a/arch/x86/kernel/verify_cpu_64.S b/arch/x86/kernel/verify_cpu.S
index 56a8c2a867d9..0edefc19a113 100644
--- a/arch/x86/kernel/verify_cpu_64.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -7,6 +7,7 @@
7 * Copyright (c) 2007 Andi Kleen (ak@suse.de) 7 * Copyright (c) 2007 Andi Kleen (ak@suse.de)
8 * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com) 8 * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com)
9 * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com) 9 * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com)
10 * Copyright (c) 2010 Kees Cook (kees.cook@canonical.com)
10 * 11 *
11 * This source code is licensed under the GNU General Public License, 12 * This source code is licensed under the GNU General Public License,
12 * Version 2. See the file COPYING for more details. 13 * Version 2. See the file COPYING for more details.
@@ -14,18 +15,17 @@
14 * This is a common code for verification whether CPU supports 15 * This is a common code for verification whether CPU supports
15 * long mode and SSE or not. It is not called directly instead this 16 * long mode and SSE or not. It is not called directly instead this
16 * file is included at various places and compiled in that context. 17 * file is included at various places and compiled in that context.
17 * Following are the current usage. 18 * This file is expected to run in 32bit code. Currently:
18 * 19 *
19 * This file is included by both 16bit and 32bit code. 20 * arch/x86/boot/compressed/head_64.S: Boot cpu verification
21 * arch/x86/kernel/trampoline_64.S: secondary processor verfication
22 * arch/x86/kernel/head_32.S: processor startup
20 * 23 *
21 * arch/x86_64/boot/setup.S : Boot cpu verification (16bit) 24 * verify_cpu, returns the status of longmode and SSE in register %eax.
22 * arch/x86_64/boot/compressed/head.S: Boot cpu verification (32bit)
23 * arch/x86_64/kernel/trampoline.S: secondary processor verfication (16bit)
24 * arch/x86_64/kernel/acpi/wakeup.S:Verfication at resume (16bit)
25 *
26 * verify_cpu, returns the status of cpu check in register %eax.
27 * 0: Success 1: Failure 25 * 0: Success 1: Failure
28 * 26 *
27 * On Intel, the XD_DISABLE flag will be cleared as a side-effect.
28 *
29 * The caller needs to check for the error code and take the action 29 * The caller needs to check for the error code and take the action
30 * appropriately. Either display a message or halt. 30 * appropriately. Either display a message or halt.
31 */ 31 */
@@ -62,8 +62,41 @@ verify_cpu:
62 cmpl $0x444d4163,%ecx 62 cmpl $0x444d4163,%ecx
63 jnz verify_cpu_noamd 63 jnz verify_cpu_noamd
64 mov $1,%di # cpu is from AMD 64 mov $1,%di # cpu is from AMD
65 jmp verify_cpu_check
65 66
66verify_cpu_noamd: 67verify_cpu_noamd:
68 cmpl $0x756e6547,%ebx # GenuineIntel?
69 jnz verify_cpu_check
70 cmpl $0x49656e69,%edx
71 jnz verify_cpu_check
72 cmpl $0x6c65746e,%ecx
73 jnz verify_cpu_check
74
75 # only call IA32_MISC_ENABLE when:
76 # family > 6 || (family == 6 && model >= 0xd)
77 movl $0x1, %eax # check CPU family and model
78 cpuid
79 movl %eax, %ecx
80
81 andl $0x0ff00f00, %eax # mask family and extended family
82 shrl $8, %eax
83 cmpl $6, %eax
84 ja verify_cpu_clear_xd # family > 6, ok
85 jb verify_cpu_check # family < 6, skip
86
87 andl $0x000f00f0, %ecx # mask model and extended model
88 shrl $4, %ecx
89 cmpl $0xd, %ecx
90 jb verify_cpu_check # family == 6, model < 0xd, skip
91
92verify_cpu_clear_xd:
93 movl $MSR_IA32_MISC_ENABLE, %ecx
94 rdmsr
95 btrl $2, %edx # clear MSR_IA32_MISC_ENABLE_XD_DISABLE
96 jnc verify_cpu_check # only write MSR if bit was changed
97 wrmsr
98
99verify_cpu_check:
67 movl $0x1,%eax # Does the cpu have what it takes 100 movl $0x1,%eax # Does the cpu have what it takes
68 cpuid 101 cpuid
69 andl $REQUIRED_MASK0,%edx 102 andl $REQUIRED_MASK0,%edx
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index e03530aebfd0..bf4700755184 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -69,7 +69,7 @@ jiffies_64 = jiffies;
69 69
70PHDRS { 70PHDRS {
71 text PT_LOAD FLAGS(5); /* R_E */ 71 text PT_LOAD FLAGS(5); /* R_E */
72 data PT_LOAD FLAGS(7); /* RWE */ 72 data PT_LOAD FLAGS(6); /* RW_ */
73#ifdef CONFIG_X86_64 73#ifdef CONFIG_X86_64
74 user PT_LOAD FLAGS(5); /* R_E */ 74 user PT_LOAD FLAGS(5); /* R_E */
75#ifdef CONFIG_SMP 75#ifdef CONFIG_SMP
@@ -116,6 +116,10 @@ SECTIONS
116 116
117 EXCEPTION_TABLE(16) :text = 0x9090 117 EXCEPTION_TABLE(16) :text = 0x9090
118 118
119#if defined(CONFIG_DEBUG_RODATA)
120 /* .text should occupy whole number of pages */
121 . = ALIGN(PAGE_SIZE);
122#endif
119 X64_ALIGN_DEBUG_RODATA_BEGIN 123 X64_ALIGN_DEBUG_RODATA_BEGIN
120 RO_DATA(PAGE_SIZE) 124 RO_DATA(PAGE_SIZE)
121 X64_ALIGN_DEBUG_RODATA_END 125 X64_ALIGN_DEBUG_RODATA_END
@@ -335,7 +339,7 @@ SECTIONS
335 __bss_start = .; 339 __bss_start = .;
336 *(.bss..page_aligned) 340 *(.bss..page_aligned)
337 *(.bss) 341 *(.bss)
338 . = ALIGN(4); 342 . = ALIGN(PAGE_SIZE);
339 __bss_stop = .; 343 __bss_stop = .;
340 } 344 }
341 345
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index f628234fbeca..3cece05e4ac4 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -575,6 +575,8 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
575 s->pics[1].elcr_mask = 0xde; 575 s->pics[1].elcr_mask = 0xde;
576 s->pics[0].pics_state = s; 576 s->pics[0].pics_state = s;
577 s->pics[1].pics_state = s; 577 s->pics[1].pics_state = s;
578 s->pics[0].isr_ack = 0xff;
579 s->pics[1].isr_ack = 0xff;
578 580
579 /* 581 /*
580 * Initialize PIO device 582 * Initialize PIO device
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index fb8b376bf28c..fbb04aee8301 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2394,7 +2394,8 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
2394 ASSERT(!VALID_PAGE(root)); 2394 ASSERT(!VALID_PAGE(root));
2395 spin_lock(&vcpu->kvm->mmu_lock); 2395 spin_lock(&vcpu->kvm->mmu_lock);
2396 kvm_mmu_free_some_pages(vcpu); 2396 kvm_mmu_free_some_pages(vcpu);
2397 sp = kvm_mmu_get_page(vcpu, i << 30, i << 30, 2397 sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
2398 i << 30,
2398 PT32_ROOT_LEVEL, 1, ACC_ALL, 2399 PT32_ROOT_LEVEL, 1, ACC_ALL,
2399 NULL); 2400 NULL);
2400 root = __pa(sp->spt); 2401 root = __pa(sp->spt);
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 55543397a8a7..09df2f9a3d69 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -23,7 +23,7 @@ mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
23obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o 23obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
24 24
25obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o 25obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o
26obj-$(CONFIG_K8_NUMA) += k8topology_64.o 26obj-$(CONFIG_AMD_NUMA) += amdtopology_64.o
27obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o 27obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o
28 28
29obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o 29obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/amdtopology_64.c
index 804a3b6c6e14..51fae9cfdecb 100644
--- a/arch/x86/mm/k8topology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -1,8 +1,8 @@
1/* 1/*
2 * AMD K8 NUMA support. 2 * AMD NUMA support.
3 * Discover the memory map and associated nodes. 3 * Discover the memory map and associated nodes.
4 * 4 *
5 * This version reads it directly from the K8 northbridge. 5 * This version reads it directly from the AMD northbridge.
6 * 6 *
7 * Copyright 2002,2003 Andi Kleen, SuSE Labs. 7 * Copyright 2002,2003 Andi Kleen, SuSE Labs.
8 */ 8 */
@@ -57,7 +57,7 @@ static __init void early_get_boot_cpu_id(void)
57{ 57{
58 /* 58 /*
59 * need to get the APIC ID of the BSP so can use that to 59 * need to get the APIC ID of the BSP so can use that to
60 * create apicid_to_node in k8_scan_nodes() 60 * create apicid_to_node in amd_scan_nodes()
61 */ 61 */
62#ifdef CONFIG_X86_MPPARSE 62#ifdef CONFIG_X86_MPPARSE
63 /* 63 /*
@@ -69,7 +69,7 @@ static __init void early_get_boot_cpu_id(void)
69 early_init_lapic_mapping(); 69 early_init_lapic_mapping();
70} 70}
71 71
72int __init k8_get_nodes(struct bootnode *physnodes) 72int __init amd_get_nodes(struct bootnode *physnodes)
73{ 73{
74 int i; 74 int i;
75 int ret = 0; 75 int ret = 0;
@@ -82,7 +82,7 @@ int __init k8_get_nodes(struct bootnode *physnodes)
82 return ret; 82 return ret;
83} 83}
84 84
85int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn) 85int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
86{ 86{
87 unsigned long start = PFN_PHYS(start_pfn); 87 unsigned long start = PFN_PHYS(start_pfn);
88 unsigned long end = PFN_PHYS(end_pfn); 88 unsigned long end = PFN_PHYS(end_pfn);
@@ -194,7 +194,7 @@ int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn)
194 return 0; 194 return 0;
195} 195}
196 196
197int __init k8_scan_nodes(void) 197int __init amd_scan_nodes(void)
198{ 198{
199 unsigned int bits; 199 unsigned int bits;
200 unsigned int cores; 200 unsigned int cores;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index c0e28a13de7d..947f42abe820 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -364,8 +364,9 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
364 /* 364 /*
365 * We just marked the kernel text read only above, now that 365 * We just marked the kernel text read only above, now that
366 * we are going to free part of that, we need to make that 366 * we are going to free part of that, we need to make that
367 * writeable first. 367 * writeable and non-executable first.
368 */ 368 */
369 set_memory_nx(begin, (end - begin) >> PAGE_SHIFT);
369 set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); 370 set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
370 371
371 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); 372 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 0e969f9f401b..f89b5bb4e93f 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -226,7 +226,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
226 226
227static inline int is_kernel_text(unsigned long addr) 227static inline int is_kernel_text(unsigned long addr)
228{ 228{
229 if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) 229 if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end)
230 return 1; 230 return 1;
231 return 0; 231 return 0;
232} 232}
@@ -912,6 +912,23 @@ void set_kernel_text_ro(void)
912 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); 912 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
913} 913}
914 914
915static void mark_nxdata_nx(void)
916{
917 /*
918 * When this called, init has already been executed and released,
919 * so everything past _etext sould be NX.
920 */
921 unsigned long start = PFN_ALIGN(_etext);
922 /*
923 * This comes from is_kernel_text upper limit. Also HPAGE where used:
924 */
925 unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start;
926
927 if (__supported_pte_mask & _PAGE_NX)
928 printk(KERN_INFO "NX-protecting the kernel data: %luk\n", size >> 10);
929 set_pages_nx(virt_to_page(start), size >> PAGE_SHIFT);
930}
931
915void mark_rodata_ro(void) 932void mark_rodata_ro(void)
916{ 933{
917 unsigned long start = PFN_ALIGN(_text); 934 unsigned long start = PFN_ALIGN(_text);
@@ -946,6 +963,7 @@ void mark_rodata_ro(void)
946 printk(KERN_INFO "Testing CPA: write protecting again\n"); 963 printk(KERN_INFO "Testing CPA: write protecting again\n");
947 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); 964 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
948#endif 965#endif
966 mark_nxdata_nx();
949} 967}
950#endif 968#endif
951 969
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c
index af3b6c8a436f..704a37cedddb 100644
--- a/arch/x86/mm/kmemcheck/error.c
+++ b/arch/x86/mm/kmemcheck/error.c
@@ -185,7 +185,7 @@ void kmemcheck_error_save(enum kmemcheck_shadow state,
185 e->trace.entries = e->trace_entries; 185 e->trace.entries = e->trace_entries;
186 e->trace.max_entries = ARRAY_SIZE(e->trace_entries); 186 e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
187 e->trace.skip = 0; 187 e->trace.skip = 0;
188 save_stack_trace_bp(&e->trace, regs->bp); 188 save_stack_trace_regs(&e->trace, regs);
189 189
190 /* Round address down to nearest 16 bytes */ 190 /* Round address down to nearest 16 bytes */
191 shadow_copy = kmemcheck_shadow_lookup(address 191 shadow_copy = kmemcheck_shadow_lookup(address
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 7ffc9b727efd..7762a517d69d 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -264,7 +264,7 @@ static struct bootnode physnodes[MAX_NUMNODES] __initdata;
264static char *cmdline __initdata; 264static char *cmdline __initdata;
265 265
266static int __init setup_physnodes(unsigned long start, unsigned long end, 266static int __init setup_physnodes(unsigned long start, unsigned long end,
267 int acpi, int k8) 267 int acpi, int amd)
268{ 268{
269 int nr_nodes = 0; 269 int nr_nodes = 0;
270 int ret = 0; 270 int ret = 0;
@@ -274,13 +274,13 @@ static int __init setup_physnodes(unsigned long start, unsigned long end,
274 if (acpi) 274 if (acpi)
275 nr_nodes = acpi_get_nodes(physnodes); 275 nr_nodes = acpi_get_nodes(physnodes);
276#endif 276#endif
277#ifdef CONFIG_K8_NUMA 277#ifdef CONFIG_AMD_NUMA
278 if (k8) 278 if (amd)
279 nr_nodes = k8_get_nodes(physnodes); 279 nr_nodes = amd_get_nodes(physnodes);
280#endif 280#endif
281 /* 281 /*
282 * Basic sanity checking on the physical node map: there may be errors 282 * Basic sanity checking on the physical node map: there may be errors
283 * if the SRAT or K8 incorrectly reported the topology or the mem= 283 * if the SRAT or AMD code incorrectly reported the topology or the mem=
284 * kernel parameter is used. 284 * kernel parameter is used.
285 */ 285 */
286 for (i = 0; i < nr_nodes; i++) { 286 for (i = 0; i < nr_nodes; i++) {
@@ -549,7 +549,7 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size)
549 * numa=fake command-line option. 549 * numa=fake command-line option.
550 */ 550 */
551static int __init numa_emulation(unsigned long start_pfn, 551static int __init numa_emulation(unsigned long start_pfn,
552 unsigned long last_pfn, int acpi, int k8) 552 unsigned long last_pfn, int acpi, int amd)
553{ 553{
554 u64 addr = start_pfn << PAGE_SHIFT; 554 u64 addr = start_pfn << PAGE_SHIFT;
555 u64 max_addr = last_pfn << PAGE_SHIFT; 555 u64 max_addr = last_pfn << PAGE_SHIFT;
@@ -557,7 +557,7 @@ static int __init numa_emulation(unsigned long start_pfn,
557 int num_nodes; 557 int num_nodes;
558 int i; 558 int i;
559 559
560 num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8); 560 num_phys_nodes = setup_physnodes(addr, max_addr, acpi, amd);
561 /* 561 /*
562 * If the numa=fake command-line contains a 'M' or 'G', it represents 562 * If the numa=fake command-line contains a 'M' or 'G', it represents
563 * the fixed node size. Otherwise, if it is just a single number N, 563 * the fixed node size. Otherwise, if it is just a single number N,
@@ -602,7 +602,7 @@ static int __init numa_emulation(unsigned long start_pfn,
602#endif /* CONFIG_NUMA_EMU */ 602#endif /* CONFIG_NUMA_EMU */
603 603
604void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn, 604void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
605 int acpi, int k8) 605 int acpi, int amd)
606{ 606{
607 int i; 607 int i;
608 608
@@ -610,7 +610,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
610 nodes_clear(node_online_map); 610 nodes_clear(node_online_map);
611 611
612#ifdef CONFIG_NUMA_EMU 612#ifdef CONFIG_NUMA_EMU
613 if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, k8)) 613 if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd))
614 return; 614 return;
615 nodes_clear(node_possible_map); 615 nodes_clear(node_possible_map);
616 nodes_clear(node_online_map); 616 nodes_clear(node_online_map);
@@ -624,8 +624,8 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
624 nodes_clear(node_online_map); 624 nodes_clear(node_online_map);
625#endif 625#endif
626 626
627#ifdef CONFIG_K8_NUMA 627#ifdef CONFIG_AMD_NUMA
628 if (!numa_off && k8 && !k8_scan_nodes()) 628 if (!numa_off && amd && !amd_scan_nodes())
629 return; 629 return;
630 nodes_clear(node_possible_map); 630 nodes_clear(node_possible_map);
631 nodes_clear(node_online_map); 631 nodes_clear(node_online_map);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 532e7933d606..8b830ca14ac4 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -13,6 +13,7 @@
13#include <linux/pfn.h> 13#include <linux/pfn.h>
14#include <linux/percpu.h> 14#include <linux/percpu.h>
15#include <linux/gfp.h> 15#include <linux/gfp.h>
16#include <linux/pci.h>
16 17
17#include <asm/e820.h> 18#include <asm/e820.h>
18#include <asm/processor.h> 19#include <asm/processor.h>
@@ -255,13 +256,16 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
255 unsigned long pfn) 256 unsigned long pfn)
256{ 257{
257 pgprot_t forbidden = __pgprot(0); 258 pgprot_t forbidden = __pgprot(0);
259 pgprot_t required = __pgprot(0);
258 260
259 /* 261 /*
260 * The BIOS area between 640k and 1Mb needs to be executable for 262 * The BIOS area between 640k and 1Mb needs to be executable for
261 * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. 263 * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support.
262 */ 264 */
263 if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) 265#ifdef CONFIG_PCI_BIOS
266 if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT))
264 pgprot_val(forbidden) |= _PAGE_NX; 267 pgprot_val(forbidden) |= _PAGE_NX;
268#endif
265 269
266 /* 270 /*
267 * The kernel text needs to be executable for obvious reasons 271 * The kernel text needs to be executable for obvious reasons
@@ -278,6 +282,12 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
278 if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, 282 if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT,
279 __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) 283 __pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
280 pgprot_val(forbidden) |= _PAGE_RW; 284 pgprot_val(forbidden) |= _PAGE_RW;
285 /*
286 * .data and .bss should always be writable.
287 */
288 if (within(address, (unsigned long)_sdata, (unsigned long)_edata) ||
289 within(address, (unsigned long)__bss_start, (unsigned long)__bss_stop))
290 pgprot_val(required) |= _PAGE_RW;
281 291
282#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) 292#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
283 /* 293 /*
@@ -317,6 +327,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
317#endif 327#endif
318 328
319 prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); 329 prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
330 prot = __pgprot(pgprot_val(prot) | pgprot_val(required));
320 331
321 return prot; 332 return prot;
322} 333}
@@ -393,7 +404,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
393{ 404{
394 unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; 405 unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn;
395 pte_t new_pte, old_pte, *tmp; 406 pte_t new_pte, old_pte, *tmp;
396 pgprot_t old_prot, new_prot; 407 pgprot_t old_prot, new_prot, req_prot;
397 int i, do_split = 1; 408 int i, do_split = 1;
398 unsigned int level; 409 unsigned int level;
399 410
@@ -438,10 +449,10 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
438 * We are safe now. Check whether the new pgprot is the same: 449 * We are safe now. Check whether the new pgprot is the same:
439 */ 450 */
440 old_pte = *kpte; 451 old_pte = *kpte;
441 old_prot = new_prot = pte_pgprot(old_pte); 452 old_prot = new_prot = req_prot = pte_pgprot(old_pte);
442 453
443 pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); 454 pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
444 pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); 455 pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
445 456
446 /* 457 /*
447 * old_pte points to the large page base address. So we need 458 * old_pte points to the large page base address. So we need
@@ -450,17 +461,17 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
450 pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT); 461 pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT);
451 cpa->pfn = pfn; 462 cpa->pfn = pfn;
452 463
453 new_prot = static_protections(new_prot, address, pfn); 464 new_prot = static_protections(req_prot, address, pfn);
454 465
455 /* 466 /*
456 * We need to check the full range, whether 467 * We need to check the full range, whether
457 * static_protection() requires a different pgprot for one of 468 * static_protection() requires a different pgprot for one of
458 * the pages in the range we try to preserve: 469 * the pages in the range we try to preserve:
459 */ 470 */
460 addr = address + PAGE_SIZE; 471 addr = address & pmask;
461 pfn++; 472 pfn = pte_pfn(old_pte);
462 for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE, pfn++) { 473 for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) {
463 pgprot_t chk_prot = static_protections(new_prot, addr, pfn); 474 pgprot_t chk_prot = static_protections(req_prot, addr, pfn);
464 475
465 if (pgprot_val(chk_prot) != pgprot_val(new_prot)) 476 if (pgprot_val(chk_prot) != pgprot_val(new_prot))
466 goto out_unlock; 477 goto out_unlock;
@@ -483,7 +494,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
483 * that we limited the number of possible pages already to 494 * that we limited the number of possible pages already to
484 * the number of pages in the large page. 495 * the number of pages in the large page.
485 */ 496 */
486 if (address == (nextpage_addr - psize) && cpa->numpages == numpages) { 497 if (address == (address & pmask) && cpa->numpages == (psize >> PAGE_SHIFT)) {
487 /* 498 /*
488 * The address is aligned and the number of pages 499 * The address is aligned and the number of pages
489 * covers the full page. 500 * covers the full page.
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
index a3250aa34086..410531d3c292 100644
--- a/arch/x86/mm/setup_nx.c
+++ b/arch/x86/mm/setup_nx.c
@@ -41,7 +41,7 @@ void __init x86_report_nx(void)
41{ 41{
42 if (!cpu_has_nx) { 42 if (!cpu_has_nx) {
43 printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " 43 printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
44 "missing in CPU or disabled in BIOS!\n"); 44 "missing in CPU!\n");
45 } else { 45 } else {
46#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) 46#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
47 if (disable_nx) { 47 if (disable_nx) {
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index a17dffd136c1..f16434568a51 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -92,6 +92,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity)
92 /* mark this node as "seen" in node bitmap */ 92 /* mark this node as "seen" in node bitmap */
93 BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo); 93 BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo);
94 94
95 /* don't need to check apic_id here, because it is always 8 bits */
95 apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo; 96 apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo;
96 97
97 printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n", 98 printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n",
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index a35cb9d8b060..171a0aacb99a 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -134,6 +134,10 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
134 } 134 }
135 135
136 apic_id = pa->apic_id; 136 apic_id = pa->apic_id;
137 if (apic_id >= MAX_LOCAL_APIC) {
138 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
139 return;
140 }
137 apicid_to_node[apic_id] = node; 141 apicid_to_node[apic_id] = node;
138 node_set(node, cpu_nodes_parsed); 142 node_set(node, cpu_nodes_parsed);
139 acpi_numa = 1; 143 acpi_numa = 1;
@@ -168,6 +172,12 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
168 apic_id = (pa->apic_id << 8) | pa->local_sapic_eid; 172 apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
169 else 173 else
170 apic_id = pa->apic_id; 174 apic_id = pa->apic_id;
175
176 if (apic_id >= MAX_LOCAL_APIC) {
177 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
178 return;
179 }
180
171 apicid_to_node[apic_id] = node; 181 apicid_to_node[apic_id] = node;
172 node_set(node, cpu_nodes_parsed); 182 node_set(node, cpu_nodes_parsed);
173 acpi_numa = 1; 183 acpi_numa = 1;
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 2d49d4e19a36..72cbec14d783 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -126,7 +126,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
126 if (!user_mode_vm(regs)) { 126 if (!user_mode_vm(regs)) {
127 unsigned long stack = kernel_stack_pointer(regs); 127 unsigned long stack = kernel_stack_pointer(regs);
128 if (depth) 128 if (depth)
129 dump_trace(NULL, regs, (unsigned long *)stack, 0, 129 dump_trace(NULL, regs, (unsigned long *)stack,
130 &backtrace_ops, &depth); 130 &backtrace_ops, &depth);
131 return; 131 return;
132 } 132 }
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 4e8baad36d37..358c8b9c96a7 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -732,6 +732,9 @@ int __init op_nmi_init(struct oprofile_operations *ops)
732 case 0x14: 732 case 0x14:
733 cpu_type = "x86-64/family14h"; 733 cpu_type = "x86-64/family14h";
734 break; 734 break;
735 case 0x15:
736 cpu_type = "x86-64/family15h";
737 break;
735 default: 738 default:
736 return -ENODEV; 739 return -ENODEV;
737 } 740 }
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c
index e3ecb71b5790..0636dd93cef8 100644
--- a/arch/x86/oprofile/nmi_timer_int.c
+++ b/arch/x86/oprofile/nmi_timer_int.c
@@ -58,9 +58,6 @@ static void timer_stop(void)
58 58
59int __init op_nmi_timer_init(struct oprofile_operations *ops) 59int __init op_nmi_timer_init(struct oprofile_operations *ops)
60{ 60{
61 if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0))
62 return -ENODEV;
63
64 ops->start = timer_start; 61 ops->start = timer_start;
65 ops->stop = timer_stop; 62 ops->stop = timer_stop;
66 ops->cpu_type = "timer"; 63 ops->cpu_type = "timer";
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index a011bcc0f943..c3b8e24f2b16 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -29,11 +29,12 @@
29#include "op_x86_model.h" 29#include "op_x86_model.h"
30#include "op_counter.h" 30#include "op_counter.h"
31 31
32#define NUM_COUNTERS 4 32#define NUM_COUNTERS 4
33#define NUM_COUNTERS_F15H 6
33#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX 34#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
34#define NUM_VIRT_COUNTERS 32 35#define NUM_VIRT_COUNTERS 32
35#else 36#else
36#define NUM_VIRT_COUNTERS NUM_COUNTERS 37#define NUM_VIRT_COUNTERS 0
37#endif 38#endif
38 39
39#define OP_EVENT_MASK 0x0FFF 40#define OP_EVENT_MASK 0x0FFF
@@ -41,7 +42,8 @@
41 42
42#define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21)) 43#define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21))
43 44
44static unsigned long reset_value[NUM_VIRT_COUNTERS]; 45static int num_counters;
46static unsigned long reset_value[OP_MAX_COUNTER];
45 47
46#define IBS_FETCH_SIZE 6 48#define IBS_FETCH_SIZE 6
47#define IBS_OP_SIZE 12 49#define IBS_OP_SIZE 12
@@ -387,7 +389,7 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
387 int i; 389 int i;
388 390
389 /* enable active counters */ 391 /* enable active counters */
390 for (i = 0; i < NUM_COUNTERS; ++i) { 392 for (i = 0; i < num_counters; ++i) {
391 int virt = op_x86_phys_to_virt(i); 393 int virt = op_x86_phys_to_virt(i);
392 if (!reset_value[virt]) 394 if (!reset_value[virt])
393 continue; 395 continue;
@@ -406,7 +408,7 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
406{ 408{
407 int i; 409 int i;
408 410
409 for (i = 0; i < NUM_COUNTERS; ++i) { 411 for (i = 0; i < num_counters; ++i) {
410 if (!msrs->counters[i].addr) 412 if (!msrs->counters[i].addr)
411 continue; 413 continue;
412 release_perfctr_nmi(MSR_K7_PERFCTR0 + i); 414 release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
@@ -418,7 +420,7 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
418{ 420{
419 int i; 421 int i;
420 422
421 for (i = 0; i < NUM_COUNTERS; i++) { 423 for (i = 0; i < num_counters; i++) {
422 if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) 424 if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
423 goto fail; 425 goto fail;
424 if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) { 426 if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) {
@@ -426,8 +428,13 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
426 goto fail; 428 goto fail;
427 } 429 }
428 /* both registers must be reserved */ 430 /* both registers must be reserved */
429 msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; 431 if (num_counters == NUM_COUNTERS_F15H) {
430 msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; 432 msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1);
433 msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1);
434 } else {
435 msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
436 msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
437 }
431 continue; 438 continue;
432 fail: 439 fail:
433 if (!counter_config[i].enabled) 440 if (!counter_config[i].enabled)
@@ -447,7 +454,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
447 int i; 454 int i;
448 455
449 /* setup reset_value */ 456 /* setup reset_value */
450 for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { 457 for (i = 0; i < OP_MAX_COUNTER; ++i) {
451 if (counter_config[i].enabled 458 if (counter_config[i].enabled
452 && msrs->counters[op_x86_virt_to_phys(i)].addr) 459 && msrs->counters[op_x86_virt_to_phys(i)].addr)
453 reset_value[i] = counter_config[i].count; 460 reset_value[i] = counter_config[i].count;
@@ -456,7 +463,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
456 } 463 }
457 464
458 /* clear all counters */ 465 /* clear all counters */
459 for (i = 0; i < NUM_COUNTERS; ++i) { 466 for (i = 0; i < num_counters; ++i) {
460 if (!msrs->controls[i].addr) 467 if (!msrs->controls[i].addr)
461 continue; 468 continue;
462 rdmsrl(msrs->controls[i].addr, val); 469 rdmsrl(msrs->controls[i].addr, val);
@@ -472,7 +479,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
472 } 479 }
473 480
474 /* enable active counters */ 481 /* enable active counters */
475 for (i = 0; i < NUM_COUNTERS; ++i) { 482 for (i = 0; i < num_counters; ++i) {
476 int virt = op_x86_phys_to_virt(i); 483 int virt = op_x86_phys_to_virt(i);
477 if (!reset_value[virt]) 484 if (!reset_value[virt])
478 continue; 485 continue;
@@ -503,7 +510,7 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
503 u64 val; 510 u64 val;
504 int i; 511 int i;
505 512
506 for (i = 0; i < NUM_COUNTERS; ++i) { 513 for (i = 0; i < num_counters; ++i) {
507 int virt = op_x86_phys_to_virt(i); 514 int virt = op_x86_phys_to_virt(i);
508 if (!reset_value[virt]) 515 if (!reset_value[virt])
509 continue; 516 continue;
@@ -526,7 +533,7 @@ static void op_amd_start(struct op_msrs const * const msrs)
526 u64 val; 533 u64 val;
527 int i; 534 int i;
528 535
529 for (i = 0; i < NUM_COUNTERS; ++i) { 536 for (i = 0; i < num_counters; ++i) {
530 if (!reset_value[op_x86_phys_to_virt(i)]) 537 if (!reset_value[op_x86_phys_to_virt(i)])
531 continue; 538 continue;
532 rdmsrl(msrs->controls[i].addr, val); 539 rdmsrl(msrs->controls[i].addr, val);
@@ -546,7 +553,7 @@ static void op_amd_stop(struct op_msrs const * const msrs)
546 * Subtle: stop on all counters to avoid race with setting our 553 * Subtle: stop on all counters to avoid race with setting our
547 * pm callback 554 * pm callback
548 */ 555 */
549 for (i = 0; i < NUM_COUNTERS; ++i) { 556 for (i = 0; i < num_counters; ++i) {
550 if (!reset_value[op_x86_phys_to_virt(i)]) 557 if (!reset_value[op_x86_phys_to_virt(i)])
551 continue; 558 continue;
552 rdmsrl(msrs->controls[i].addr, val); 559 rdmsrl(msrs->controls[i].addr, val);
@@ -603,6 +610,7 @@ static int force_ibs_eilvt_setup(void)
603 ret = setup_ibs_ctl(i); 610 ret = setup_ibs_ctl(i);
604 if (ret) 611 if (ret)
605 return ret; 612 return ret;
613 pr_err(FW_BUG "using offset %d for IBS interrupts\n", i);
606 return 0; 614 return 0;
607 } 615 }
608 616
@@ -630,21 +638,29 @@ static int __init_ibs_nmi(void)
630 return 0; 638 return 0;
631} 639}
632 640
633/* initialize the APIC for the IBS interrupts if available */ 641/*
642 * check and reserve APIC extended interrupt LVT offset for IBS if
643 * available
644 *
645 * init_ibs() preforms implicitly cpu-local operations, so pin this
646 * thread to its current CPU
647 */
648
634static void init_ibs(void) 649static void init_ibs(void)
635{ 650{
636 ibs_caps = get_ibs_caps(); 651 preempt_disable();
637 652
653 ibs_caps = get_ibs_caps();
638 if (!ibs_caps) 654 if (!ibs_caps)
639 return; 655 goto out;
640 656
641 if (__init_ibs_nmi()) { 657 if (__init_ibs_nmi() < 0)
642 ibs_caps = 0; 658 ibs_caps = 0;
643 return; 659 else
644 } 660 printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps);
645 661
646 printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", 662out:
647 (unsigned)ibs_caps); 663 preempt_enable();
648} 664}
649 665
650static int (*create_arch_files)(struct super_block *sb, struct dentry *root); 666static int (*create_arch_files)(struct super_block *sb, struct dentry *root);
@@ -698,18 +714,29 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
698 return 0; 714 return 0;
699} 715}
700 716
717struct op_x86_model_spec op_amd_spec;
718
701static int op_amd_init(struct oprofile_operations *ops) 719static int op_amd_init(struct oprofile_operations *ops)
702{ 720{
703 init_ibs(); 721 init_ibs();
704 create_arch_files = ops->create_files; 722 create_arch_files = ops->create_files;
705 ops->create_files = setup_ibs_files; 723 ops->create_files = setup_ibs_files;
724
725 if (boot_cpu_data.x86 == 0x15) {
726 num_counters = NUM_COUNTERS_F15H;
727 } else {
728 num_counters = NUM_COUNTERS;
729 }
730
731 op_amd_spec.num_counters = num_counters;
732 op_amd_spec.num_controls = num_counters;
733 op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS);
734
706 return 0; 735 return 0;
707} 736}
708 737
709struct op_x86_model_spec op_amd_spec = { 738struct op_x86_model_spec op_amd_spec = {
710 .num_counters = NUM_COUNTERS, 739 /* num_counters/num_controls filled in at runtime */
711 .num_controls = NUM_COUNTERS,
712 .num_virt_counters = NUM_VIRT_COUNTERS,
713 .reserved = MSR_AMD_EVENTSEL_RESERVED, 740 .reserved = MSR_AMD_EVENTSEL_RESERVED,
714 .event_mask = OP_EVENT_MASK, 741 .event_mask = OP_EVENT_MASK,
715 .init = op_amd_init, 742 .init = op_amd_init,
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 182558dd5515..9fadec074142 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -11,7 +11,7 @@
11#include <linux/oprofile.h> 11#include <linux/oprofile.h>
12#include <linux/smp.h> 12#include <linux/smp.h>
13#include <linux/ptrace.h> 13#include <linux/ptrace.h>
14#include <linux/nmi.h> 14#include <asm/nmi.h>
15#include <asm/msr.h> 15#include <asm/msr.h>
16#include <asm/fixmap.h> 16#include <asm/fixmap.h>
17#include <asm/apic.h> 17#include <asm/apic.h>
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index effd96e33f16..6b8759f7634e 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_PCI_OLPC) += olpc.o
7obj-$(CONFIG_PCI_XEN) += xen.o 7obj-$(CONFIG_PCI_XEN) += xen.o
8 8
9obj-y += fixup.o 9obj-y += fixup.o
10obj-$(CONFIG_X86_INTEL_CE) += ce4100.o
10obj-$(CONFIG_ACPI) += acpi.o 11obj-$(CONFIG_ACPI) += acpi.o
11obj-y += legacy.o irq.o 12obj-y += legacy.o irq.o
12 13
diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c
new file mode 100644
index 000000000000..85b68ef5e809
--- /dev/null
+++ b/arch/x86/pci/ce4100.c
@@ -0,0 +1,315 @@
1/*
2 * GPL LICENSE SUMMARY
3 *
4 * Copyright(c) 2010 Intel Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of version 2 of the GNU General Public License as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 * The full GNU General Public License is included in this distribution
19 * in the file called LICENSE.GPL.
20 *
21 * Contact Information:
22 * Intel Corporation
23 * 2200 Mission College Blvd.
24 * Santa Clara, CA 97052
25 *
26 * This provides access methods for PCI registers that mis-behave on
27 * the CE4100. Each register can be assigned a private init, read and
28 * write routine. The exception to this is the bridge device. The
29 * bridge device is the only device on bus zero (0) that requires any
30 * fixup so it is a special case ATM
31 */
32
33#include <linux/kernel.h>
34#include <linux/pci.h>
35#include <linux/init.h>
36
37#include <asm/pci_x86.h>
38
39struct sim_reg {
40 u32 value;
41 u32 mask;
42};
43
44struct sim_dev_reg {
45 int dev_func;
46 int reg;
47 void (*init)(struct sim_dev_reg *reg);
48 void (*read)(struct sim_dev_reg *reg, u32 *value);
49 void (*write)(struct sim_dev_reg *reg, u32 value);
50 struct sim_reg sim_reg;
51};
52
53struct sim_reg_op {
54 void (*init)(struct sim_dev_reg *reg);
55 void (*read)(struct sim_dev_reg *reg, u32 value);
56 void (*write)(struct sim_dev_reg *reg, u32 value);
57};
58
59#define MB (1024 * 1024)
60#define KB (1024)
61#define SIZE_TO_MASK(size) (~(size - 1))
62
63#define DEFINE_REG(device, func, offset, size, init_op, read_op, write_op)\
64{ PCI_DEVFN(device, func), offset, init_op, read_op, write_op,\
65 {0, SIZE_TO_MASK(size)} },
66
67static void reg_init(struct sim_dev_reg *reg)
68{
69 pci_direct_conf1.read(0, 1, reg->dev_func, reg->reg, 4,
70 &reg->sim_reg.value);
71}
72
73static void reg_read(struct sim_dev_reg *reg, u32 *value)
74{
75 unsigned long flags;
76
77 raw_spin_lock_irqsave(&pci_config_lock, flags);
78 *value = reg->sim_reg.value;
79 raw_spin_unlock_irqrestore(&pci_config_lock, flags);
80}
81
82static void reg_write(struct sim_dev_reg *reg, u32 value)
83{
84 unsigned long flags;
85
86 raw_spin_lock_irqsave(&pci_config_lock, flags);
87 reg->sim_reg.value = (value & reg->sim_reg.mask) |
88 (reg->sim_reg.value & ~reg->sim_reg.mask);
89 raw_spin_unlock_irqrestore(&pci_config_lock, flags);
90}
91
92static void sata_reg_init(struct sim_dev_reg *reg)
93{
94 pci_direct_conf1.read(0, 1, PCI_DEVFN(14, 0), 0x10, 4,
95 &reg->sim_reg.value);
96 reg->sim_reg.value += 0x400;
97}
98
99static void ehci_reg_read(struct sim_dev_reg *reg, u32 *value)
100{
101 reg_read(reg, value);
102 if (*value != reg->sim_reg.mask)
103 *value |= 0x100;
104}
105
106void sata_revid_init(struct sim_dev_reg *reg)
107{
108 reg->sim_reg.value = 0x01060100;
109 reg->sim_reg.mask = 0;
110}
111
112static void sata_revid_read(struct sim_dev_reg *reg, u32 *value)
113{
114 reg_read(reg, value);
115}
116
117static struct sim_dev_reg bus1_fixups[] = {
118 DEFINE_REG(2, 0, 0x10, (16*MB), reg_init, reg_read, reg_write)
119 DEFINE_REG(2, 0, 0x14, (256), reg_init, reg_read, reg_write)
120 DEFINE_REG(2, 1, 0x10, (64*KB), reg_init, reg_read, reg_write)
121 DEFINE_REG(3, 0, 0x10, (64*KB), reg_init, reg_read, reg_write)
122 DEFINE_REG(4, 0, 0x10, (128*KB), reg_init, reg_read, reg_write)
123 DEFINE_REG(4, 1, 0x10, (128*KB), reg_init, reg_read, reg_write)
124 DEFINE_REG(6, 0, 0x10, (512*KB), reg_init, reg_read, reg_write)
125 DEFINE_REG(6, 1, 0x10, (512*KB), reg_init, reg_read, reg_write)
126 DEFINE_REG(6, 2, 0x10, (64*KB), reg_init, reg_read, reg_write)
127 DEFINE_REG(8, 0, 0x10, (1*MB), reg_init, reg_read, reg_write)
128 DEFINE_REG(8, 1, 0x10, (64*KB), reg_init, reg_read, reg_write)
129 DEFINE_REG(8, 2, 0x10, (64*KB), reg_init, reg_read, reg_write)
130 DEFINE_REG(9, 0, 0x10 , (1*MB), reg_init, reg_read, reg_write)
131 DEFINE_REG(9, 0, 0x14, (64*KB), reg_init, reg_read, reg_write)
132 DEFINE_REG(10, 0, 0x10, (256), reg_init, reg_read, reg_write)
133 DEFINE_REG(10, 0, 0x14, (256*MB), reg_init, reg_read, reg_write)
134 DEFINE_REG(11, 0, 0x10, (256), reg_init, reg_read, reg_write)
135 DEFINE_REG(11, 0, 0x14, (256), reg_init, reg_read, reg_write)
136 DEFINE_REG(11, 1, 0x10, (256), reg_init, reg_read, reg_write)
137 DEFINE_REG(11, 2, 0x10, (256), reg_init, reg_read, reg_write)
138 DEFINE_REG(11, 2, 0x14, (256), reg_init, reg_read, reg_write)
139 DEFINE_REG(11, 2, 0x18, (256), reg_init, reg_read, reg_write)
140 DEFINE_REG(11, 3, 0x10, (256), reg_init, reg_read, reg_write)
141 DEFINE_REG(11, 3, 0x14, (256), reg_init, reg_read, reg_write)
142 DEFINE_REG(11, 4, 0x10, (256), reg_init, reg_read, reg_write)
143 DEFINE_REG(11, 5, 0x10, (64*KB), reg_init, reg_read, reg_write)
144 DEFINE_REG(11, 6, 0x10, (256), reg_init, reg_read, reg_write)
145 DEFINE_REG(11, 7, 0x10, (64*KB), reg_init, reg_read, reg_write)
146 DEFINE_REG(12, 0, 0x10, (128*KB), reg_init, reg_read, reg_write)
147 DEFINE_REG(12, 0, 0x14, (256), reg_init, reg_read, reg_write)
148 DEFINE_REG(12, 1, 0x10, (1024), reg_init, reg_read, reg_write)
149 DEFINE_REG(13, 0, 0x10, (32*KB), reg_init, ehci_reg_read, reg_write)
150 DEFINE_REG(13, 1, 0x10, (32*KB), reg_init, ehci_reg_read, reg_write)
151 DEFINE_REG(14, 0, 0x8, 0, sata_revid_init, sata_revid_read, 0)
152 DEFINE_REG(14, 0, 0x10, 0, reg_init, reg_read, reg_write)
153 DEFINE_REG(14, 0, 0x14, 0, reg_init, reg_read, reg_write)
154 DEFINE_REG(14, 0, 0x18, 0, reg_init, reg_read, reg_write)
155 DEFINE_REG(14, 0, 0x1C, 0, reg_init, reg_read, reg_write)
156 DEFINE_REG(14, 0, 0x20, 0, reg_init, reg_read, reg_write)
157 DEFINE_REG(14, 0, 0x24, (0x200), sata_reg_init, reg_read, reg_write)
158 DEFINE_REG(15, 0, 0x10, (64*KB), reg_init, reg_read, reg_write)
159 DEFINE_REG(15, 0, 0x14, (64*KB), reg_init, reg_read, reg_write)
160 DEFINE_REG(16, 0, 0x10, (64*KB), reg_init, reg_read, reg_write)
161 DEFINE_REG(16, 0, 0x14, (64*MB), reg_init, reg_read, reg_write)
162 DEFINE_REG(16, 0, 0x18, (64*MB), reg_init, reg_read, reg_write)
163 DEFINE_REG(17, 0, 0x10, (128*KB), reg_init, reg_read, reg_write)
164 DEFINE_REG(18, 0, 0x10, (1*KB), reg_init, reg_read, reg_write)
165};
166
167static void __init init_sim_regs(void)
168{
169 int i;
170
171 for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) {
172 if (bus1_fixups[i].init)
173 bus1_fixups[i].init(&bus1_fixups[i]);
174 }
175}
176
177static inline void extract_bytes(u32 *value, int reg, int len)
178{
179 uint32_t mask;
180
181 *value >>= ((reg & 3) * 8);
182 mask = 0xFFFFFFFF >> ((4 - len) * 8);
183 *value &= mask;
184}
185
186int bridge_read(unsigned int devfn, int reg, int len, u32 *value)
187{
188 u32 av_bridge_base, av_bridge_limit;
189 int retval = 0;
190
191 switch (reg) {
192 /* Make BARs appear to not request any memory. */
193 case PCI_BASE_ADDRESS_0:
194 case PCI_BASE_ADDRESS_0 + 1:
195 case PCI_BASE_ADDRESS_0 + 2:
196 case PCI_BASE_ADDRESS_0 + 3:
197 *value = 0;
198 break;
199
200 /* Since subordinate bus number register is hardwired
201 * to zero and read only, so do the simulation.
202 */
203 case PCI_PRIMARY_BUS:
204 if (len == 4)
205 *value = 0x00010100;
206 break;
207
208 case PCI_SUBORDINATE_BUS:
209 *value = 1;
210 break;
211
212 case PCI_MEMORY_BASE:
213 case PCI_MEMORY_LIMIT:
214 /* Get the A/V bridge base address. */
215 pci_direct_conf1.read(0, 0, devfn,
216 PCI_BASE_ADDRESS_0, 4, &av_bridge_base);
217
218 av_bridge_limit = av_bridge_base + (512*MB - 1);
219 av_bridge_limit >>= 16;
220 av_bridge_limit &= 0xFFF0;
221
222 av_bridge_base >>= 16;
223 av_bridge_base &= 0xFFF0;
224
225 if (reg == PCI_MEMORY_LIMIT)
226 *value = av_bridge_limit;
227 else if (len == 2)
228 *value = av_bridge_base;
229 else
230 *value = (av_bridge_limit << 16) | av_bridge_base;
231 break;
232 /* Make prefetchable memory limit smaller than prefetchable
233 * memory base, so not claim prefetchable memory space.
234 */
235 case PCI_PREF_MEMORY_BASE:
236 *value = 0xFFF0;
237 break;
238 case PCI_PREF_MEMORY_LIMIT:
239 *value = 0x0;
240 break;
241 /* Make IO limit smaller than IO base, so not claim IO space. */
242 case PCI_IO_BASE:
243 *value = 0xF0;
244 break;
245 case PCI_IO_LIMIT:
246 *value = 0;
247 break;
248 default:
249 retval = 1;
250 }
251 return retval;
252}
253
254static int ce4100_conf_read(unsigned int seg, unsigned int bus,
255 unsigned int devfn, int reg, int len, u32 *value)
256{
257 int i, retval = 1;
258
259 if (bus == 1) {
260 for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) {
261 if (bus1_fixups[i].dev_func == devfn &&
262 bus1_fixups[i].reg == (reg & ~3) &&
263 bus1_fixups[i].read) {
264 bus1_fixups[i].read(&(bus1_fixups[i]),
265 value);
266 extract_bytes(value, reg, len);
267 return 0;
268 }
269 }
270 }
271
272 if (bus == 0 && (PCI_DEVFN(1, 0) == devfn) &&
273 !bridge_read(devfn, reg, len, value))
274 return 0;
275
276 return pci_direct_conf1.read(seg, bus, devfn, reg, len, value);
277}
278
279static int ce4100_conf_write(unsigned int seg, unsigned int bus,
280 unsigned int devfn, int reg, int len, u32 value)
281{
282 int i;
283
284 if (bus == 1) {
285 for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) {
286 if (bus1_fixups[i].dev_func == devfn &&
287 bus1_fixups[i].reg == (reg & ~3) &&
288 bus1_fixups[i].write) {
289 bus1_fixups[i].write(&(bus1_fixups[i]),
290 value);
291 return 0;
292 }
293 }
294 }
295
296 /* Discard writes to A/V bridge BAR. */
297 if (bus == 0 && PCI_DEVFN(1, 0) == devfn &&
298 ((reg & ~3) == PCI_BASE_ADDRESS_0))
299 return 0;
300
301 return pci_direct_conf1.write(seg, bus, devfn, reg, len, value);
302}
303
304struct pci_raw_ops ce4100_pci_conf = {
305 .read = ce4100_conf_read,
306 .write = ce4100_conf_write,
307};
308
309static int __init ce4100_pci_init(void)
310{
311 init_sim_regs();
312 raw_pci_ops = &ce4100_pci_conf;
313 return 0;
314}
315subsys_initcall(ce4100_pci_init);
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index 2492d165096a..a5f7d0d63de0 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -9,6 +9,7 @@
9#include <linux/uaccess.h> 9#include <linux/uaccess.h>
10#include <asm/pci_x86.h> 10#include <asm/pci_x86.h>
11#include <asm/pci-functions.h> 11#include <asm/pci-functions.h>
12#include <asm/cacheflush.h>
12 13
13/* BIOS32 signature: "_32_" */ 14/* BIOS32 signature: "_32_" */
14#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) 15#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
@@ -25,6 +26,27 @@
25#define PCIBIOS_HW_TYPE1_SPEC 0x10 26#define PCIBIOS_HW_TYPE1_SPEC 0x10
26#define PCIBIOS_HW_TYPE2_SPEC 0x20 27#define PCIBIOS_HW_TYPE2_SPEC 0x20
27 28
29int pcibios_enabled;
30
31/* According to the BIOS specification at:
32 * http://members.datafast.net.au/dft0802/specs/bios21.pdf, we could
33 * restrict the x zone to some pages and make it ro. But this may be
34 * broken on some bios, complex to handle with static_protections.
35 * We could make the 0xe0000-0x100000 range rox, but this can break
36 * some ISA mapping.
37 *
38 * So we let's an rw and x hole when pcibios is used. This shouldn't
39 * happen for modern system with mmconfig, and if you don't want it
40 * you could disable pcibios...
41 */
42static inline void set_bios_x(void)
43{
44 pcibios_enabled = 1;
45 set_memory_x(PAGE_OFFSET + BIOS_BEGIN, (BIOS_END - BIOS_BEGIN) >> PAGE_SHIFT);
46 if (__supported_pte_mask & _PAGE_NX)
47 printk(KERN_INFO "PCI : PCI BIOS aera is rw and x. Use pci=nobios if you want it NX.\n");
48}
49
28/* 50/*
29 * This is the standard structure used to identify the entry point 51 * This is the standard structure used to identify the entry point
30 * to the BIOS32 Service Directory, as documented in 52 * to the BIOS32 Service Directory, as documented in
@@ -332,6 +354,7 @@ static struct pci_raw_ops * __devinit pci_find_bios(void)
332 DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", 354 DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n",
333 bios32_entry); 355 bios32_entry);
334 bios32_indirect.address = bios32_entry + PAGE_OFFSET; 356 bios32_indirect.address = bios32_entry + PAGE_OFFSET;
357 set_bios_x();
335 if (check_pcibios()) 358 if (check_pcibios())
336 return &pci_bios_access; 359 return &pci_bios_access;
337 } 360 }
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile
index 7bf70b812fa2..021eee91c056 100644
--- a/arch/x86/platform/Makefile
+++ b/arch/x86/platform/Makefile
@@ -1,5 +1,7 @@
1# Platform specific code goes here 1# Platform specific code goes here
2obj-y += ce4100/
2obj-y += efi/ 3obj-y += efi/
4obj-y += iris/
3obj-y += mrst/ 5obj-y += mrst/
4obj-y += olpc/ 6obj-y += olpc/
5obj-y += scx200/ 7obj-y += scx200/
diff --git a/arch/x86/platform/ce4100/Makefile b/arch/x86/platform/ce4100/Makefile
new file mode 100644
index 000000000000..91fc92971d94
--- /dev/null
+++ b/arch/x86/platform/ce4100/Makefile
@@ -0,0 +1 @@
obj-$(CONFIG_X86_INTEL_CE) += ce4100.o
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
new file mode 100644
index 000000000000..d2c0d51a7178
--- /dev/null
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -0,0 +1,132 @@
1/*
2 * Intel CE4100 platform specific setup code
3 *
4 * (C) Copyright 2010 Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; version 2
9 * of the License.
10 */
11#include <linux/init.h>
12#include <linux/kernel.h>
13#include <linux/irq.h>
14#include <linux/module.h>
15#include <linux/serial_reg.h>
16#include <linux/serial_8250.h>
17
18#include <asm/setup.h>
19#include <asm/io.h>
20
21static int ce4100_i8042_detect(void)
22{
23 return 0;
24}
25
26static void __init sdv_find_smp_config(void)
27{
28}
29
30#ifdef CONFIG_SERIAL_8250
31
32
33static unsigned int mem_serial_in(struct uart_port *p, int offset)
34{
35 offset = offset << p->regshift;
36 return readl(p->membase + offset);
37}
38
39/*
40 * The UART Tx interrupts are not set under some conditions and therefore serial
41 * transmission hangs. This is a silicon issue and has not been root caused. The
42 * workaround for this silicon issue checks UART_LSR_THRE bit and UART_LSR_TEMT
43 * bit of LSR register in interrupt handler to see whether at least one of these
44 * two bits is set, if so then process the transmit request. If this workaround
45 * is not applied, then the serial transmission may hang. This workaround is for
46 * errata number 9 in Errata - B step.
47*/
48
49static unsigned int ce4100_mem_serial_in(struct uart_port *p, int offset)
50{
51 unsigned int ret, ier, lsr;
52
53 if (offset == UART_IIR) {
54 offset = offset << p->regshift;
55 ret = readl(p->membase + offset);
56 if (ret & UART_IIR_NO_INT) {
57 /* see if the TX interrupt should have really set */
58 ier = mem_serial_in(p, UART_IER);
59 /* see if the UART's XMIT interrupt is enabled */
60 if (ier & UART_IER_THRI) {
61 lsr = mem_serial_in(p, UART_LSR);
62 /* now check to see if the UART should be
63 generating an interrupt (but isn't) */
64 if (lsr & (UART_LSR_THRE | UART_LSR_TEMT))
65 ret &= ~UART_IIR_NO_INT;
66 }
67 }
68 } else
69 ret = mem_serial_in(p, offset);
70 return ret;
71}
72
73static void ce4100_mem_serial_out(struct uart_port *p, int offset, int value)
74{
75 offset = offset << p->regshift;
76 writel(value, p->membase + offset);
77}
78
79static void ce4100_serial_fixup(int port, struct uart_port *up,
80 unsigned short *capabilites)
81{
82#ifdef CONFIG_EARLY_PRINTK
83 /*
84 * Over ride the legacy port configuration that comes from
85 * asm/serial.h. Using the ioport driver then switching to the
86 * PCI memmaped driver hangs the IOAPIC
87 */
88 if (up->iotype != UPIO_MEM32) {
89 up->uartclk = 14745600;
90 up->mapbase = 0xdffe0200;
91 set_fixmap_nocache(FIX_EARLYCON_MEM_BASE,
92 up->mapbase & PAGE_MASK);
93 up->membase =
94 (void __iomem *)__fix_to_virt(FIX_EARLYCON_MEM_BASE);
95 up->membase += up->mapbase & ~PAGE_MASK;
96 up->iotype = UPIO_MEM32;
97 up->regshift = 2;
98 }
99#endif
100 up->iobase = 0;
101 up->serial_in = ce4100_mem_serial_in;
102 up->serial_out = ce4100_mem_serial_out;
103
104 *capabilites |= (1 << 12);
105}
106
107static __init void sdv_serial_fixup(void)
108{
109 serial8250_set_isa_configurator(ce4100_serial_fixup);
110}
111
112#else
113static inline void sdv_serial_fixup(void);
114#endif
115
116static void __init sdv_arch_setup(void)
117{
118 sdv_serial_fixup();
119}
120
121/*
122 * CE4100 specific x86_init function overrides and early setup
123 * calls.
124 */
125void __init x86_ce4100_early_setup(void)
126{
127 x86_init.oem.arch_setup = sdv_arch_setup;
128 x86_platform.i8042_detect = ce4100_i8042_detect;
129 x86_init.resources.probe_roms = x86_init_noop;
130 x86_init.mpparse.get_smp_config = x86_init_uint_noop;
131 x86_init.mpparse.find_smp_config = sdv_find_smp_config;
132}
diff --git a/arch/x86/platform/iris/Makefile b/arch/x86/platform/iris/Makefile
new file mode 100644
index 000000000000..db921983a102
--- /dev/null
+++ b/arch/x86/platform/iris/Makefile
@@ -0,0 +1 @@
obj-$(CONFIG_X86_32_IRIS) += iris.o
diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c
new file mode 100644
index 000000000000..1ba7f5ed8c9b
--- /dev/null
+++ b/arch/x86/platform/iris/iris.c
@@ -0,0 +1,91 @@
1/*
2 * Eurobraille/Iris power off support.
3 *
4 * Eurobraille's Iris machine is a PC with no APM or ACPI support.
5 * It is shutdown by a special I/O sequence which this module provides.
6 *
7 * Copyright (C) Shérab <Sebastien.Hinderer@ens-lyon.org>
8 *
9 * This program is free software ; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation ; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY ; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with the program ; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 */
23
24#include <linux/moduleparam.h>
25#include <linux/module.h>
26#include <linux/kernel.h>
27#include <linux/errno.h>
28#include <linux/delay.h>
29#include <linux/init.h>
30#include <linux/pm.h>
31#include <asm/io.h>
32
33#define IRIS_GIO_BASE 0x340
34#define IRIS_GIO_INPUT IRIS_GIO_BASE
35#define IRIS_GIO_OUTPUT (IRIS_GIO_BASE + 1)
36#define IRIS_GIO_PULSE 0x80 /* First byte to send */
37#define IRIS_GIO_REST 0x00 /* Second byte to send */
38#define IRIS_GIO_NODEV 0xff /* Likely not an Iris */
39
40MODULE_LICENSE("GPL");
41MODULE_AUTHOR("Sébastien Hinderer <Sebastien.Hinderer@ens-lyon.org>");
42MODULE_DESCRIPTION("A power_off handler for Iris devices from EuroBraille");
43MODULE_SUPPORTED_DEVICE("Eurobraille/Iris");
44
45static int force;
46
47module_param(force, bool, 0);
48MODULE_PARM_DESC(force, "Set to one to force poweroff handler installation.");
49
50static void (*old_pm_power_off)(void);
51
52static void iris_power_off(void)
53{
54 outb(IRIS_GIO_PULSE, IRIS_GIO_OUTPUT);
55 msleep(850);
56 outb(IRIS_GIO_REST, IRIS_GIO_OUTPUT);
57}
58
59/*
60 * Before installing the power_off handler, try to make sure the OS is
61 * running on an Iris. Since Iris does not support DMI, this is done
62 * by reading its input port and seeing whether the read value is
63 * meaningful.
64 */
65static int iris_init(void)
66{
67 unsigned char status;
68 if (force != 1) {
69 printk(KERN_ERR "The force parameter has not been set to 1 so the Iris poweroff handler will not be installed.\n");
70 return -ENODEV;
71 }
72 status = inb(IRIS_GIO_INPUT);
73 if (status == IRIS_GIO_NODEV) {
74 printk(KERN_ERR "This machine does not seem to be an Iris. Power_off handler not installed.\n");
75 return -ENODEV;
76 }
77 old_pm_power_off = pm_power_off;
78 pm_power_off = &iris_power_off;
79 printk(KERN_INFO "Iris power_off handler installed.\n");
80
81 return 0;
82}
83
84static void iris_exit(void)
85{
86 pm_power_off = old_pm_power_off;
87 printk(KERN_INFO "Iris power_off handler uninstalled.\n");
88}
89
90module_init(iris_init);
91module_exit(iris_exit);
diff --git a/arch/x86/platform/mrst/Makefile b/arch/x86/platform/mrst/Makefile
index efbbc552fa95..f61ccdd49341 100644
--- a/arch/x86/platform/mrst/Makefile
+++ b/arch/x86/platform/mrst/Makefile
@@ -1 +1,3 @@
1obj-$(CONFIG_X86_MRST) += mrst.o 1obj-$(CONFIG_X86_MRST) += mrst.o
2obj-$(CONFIG_X86_MRST) += vrtc.o
3obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o
diff --git a/arch/x86/kernel/early_printk_mrst.c b/arch/x86/platform/mrst/early_printk_mrst.c
index 65df603622b2..65df603622b2 100644
--- a/arch/x86/kernel/early_printk_mrst.c
+++ b/arch/x86/platform/mrst/early_printk_mrst.c
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index 79ae68154e87..fee0b4914e07 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -9,9 +9,19 @@
9 * as published by the Free Software Foundation; version 2 9 * as published by the Free Software Foundation; version 2
10 * of the License. 10 * of the License.
11 */ 11 */
12
13#define pr_fmt(fmt) "mrst: " fmt
14
12#include <linux/init.h> 15#include <linux/init.h>
13#include <linux/kernel.h> 16#include <linux/kernel.h>
14#include <linux/sfi.h> 17#include <linux/sfi.h>
18#include <linux/intel_pmic_gpio.h>
19#include <linux/spi/spi.h>
20#include <linux/i2c.h>
21#include <linux/i2c/pca953x.h>
22#include <linux/gpio_keys.h>
23#include <linux/input.h>
24#include <linux/platform_device.h>
15#include <linux/irq.h> 25#include <linux/irq.h>
16#include <linux/module.h> 26#include <linux/module.h>
17 27
@@ -23,7 +33,9 @@
23#include <asm/mrst.h> 33#include <asm/mrst.h>
24#include <asm/io.h> 34#include <asm/io.h>
25#include <asm/i8259.h> 35#include <asm/i8259.h>
36#include <asm/intel_scu_ipc.h>
26#include <asm/apb_timer.h> 37#include <asm/apb_timer.h>
38#include <asm/reboot.h>
27 39
28/* 40/*
29 * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock, 41 * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock,
@@ -102,10 +114,10 @@ static int __init sfi_parse_mtmr(struct sfi_table_header *table)
102 memcpy(sfi_mtimer_array, pentry, totallen); 114 memcpy(sfi_mtimer_array, pentry, totallen);
103 } 115 }
104 116
105 printk(KERN_INFO "SFI: MTIMER info (num = %d):\n", sfi_mtimer_num); 117 pr_debug("SFI MTIMER info (num = %d):\n", sfi_mtimer_num);
106 pentry = sfi_mtimer_array; 118 pentry = sfi_mtimer_array;
107 for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) { 119 for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) {
108 printk(KERN_INFO "timer[%d]: paddr = 0x%08x, freq = %dHz," 120 pr_debug("timer[%d]: paddr = 0x%08x, freq = %dHz,"
109 " irq = %d\n", totallen, (u32)pentry->phys_addr, 121 " irq = %d\n", totallen, (u32)pentry->phys_addr,
110 pentry->freq_hz, pentry->irq); 122 pentry->freq_hz, pentry->irq);
111 if (!pentry->irq) 123 if (!pentry->irq)
@@ -176,14 +188,14 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table)
176 memcpy(sfi_mrtc_array, pentry, totallen); 188 memcpy(sfi_mrtc_array, pentry, totallen);
177 } 189 }
178 190
179 printk(KERN_INFO "SFI: RTC info (num = %d):\n", sfi_mrtc_num); 191 pr_debug("SFI RTC info (num = %d):\n", sfi_mrtc_num);
180 pentry = sfi_mrtc_array; 192 pentry = sfi_mrtc_array;
181 for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) { 193 for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) {
182 printk(KERN_INFO "RTC[%d]: paddr = 0x%08x, irq = %d\n", 194 pr_debug("RTC[%d]: paddr = 0x%08x, irq = %d\n",
183 totallen, (u32)pentry->phys_addr, pentry->irq); 195 totallen, (u32)pentry->phys_addr, pentry->irq);
184 mp_irq.type = MP_IOAPIC; 196 mp_irq.type = MP_IOAPIC;
185 mp_irq.irqtype = mp_INT; 197 mp_irq.irqtype = mp_INT;
186 mp_irq.irqflag = 0; 198 mp_irq.irqflag = 0xf; /* level trigger and active low */
187 mp_irq.srcbus = 0; 199 mp_irq.srcbus = 0;
188 mp_irq.srcbusirq = pentry->irq; /* IRQ */ 200 mp_irq.srcbusirq = pentry->irq; /* IRQ */
189 mp_irq.dstapic = MP_APIC_ALL; 201 mp_irq.dstapic = MP_APIC_ALL;
@@ -209,6 +221,7 @@ static unsigned long __init mrst_calibrate_tsc(void)
209 221
210void __init mrst_time_init(void) 222void __init mrst_time_init(void)
211{ 223{
224 sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
212 switch (mrst_timer_options) { 225 switch (mrst_timer_options) {
213 case MRST_TIMER_APBT_ONLY: 226 case MRST_TIMER_APBT_ONLY:
214 break; 227 break;
@@ -224,16 +237,10 @@ void __init mrst_time_init(void)
224 return; 237 return;
225 } 238 }
226 /* we need at least one APB timer */ 239 /* we need at least one APB timer */
227 sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
228 pre_init_apic_IRQ0(); 240 pre_init_apic_IRQ0();
229 apbt_time_init(); 241 apbt_time_init();
230} 242}
231 243
232void __init mrst_rtc_init(void)
233{
234 sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
235}
236
237void __cpuinit mrst_arch_setup(void) 244void __cpuinit mrst_arch_setup(void)
238{ 245{
239 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27) 246 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27)
@@ -256,6 +263,17 @@ static int mrst_i8042_detect(void)
256 return 0; 263 return 0;
257} 264}
258 265
266/* Reboot and power off are handled by the SCU on a MID device */
267static void mrst_power_off(void)
268{
269 intel_scu_ipc_simple_command(0xf1, 1);
270}
271
272static void mrst_reboot(void)
273{
274 intel_scu_ipc_simple_command(0xf1, 0);
275}
276
259/* 277/*
260 * Moorestown specific x86_init function overrides and early setup 278 * Moorestown specific x86_init function overrides and early setup
261 * calls. 279 * calls.
@@ -281,6 +299,10 @@ void __init x86_mrst_early_setup(void)
281 299
282 legacy_pic = &null_legacy_pic; 300 legacy_pic = &null_legacy_pic;
283 301
302 /* Moorestown specific power_off/restart method */
303 pm_power_off = mrst_power_off;
304 machine_ops.emergency_restart = mrst_reboot;
305
284 /* Avoid searching for BIOS MP tables */ 306 /* Avoid searching for BIOS MP tables */
285 x86_init.mpparse.find_smp_config = x86_init_noop; 307 x86_init.mpparse.find_smp_config = x86_init_noop;
286 x86_init.mpparse.get_smp_config = x86_init_uint_noop; 308 x86_init.mpparse.get_smp_config = x86_init_uint_noop;
@@ -309,3 +331,505 @@ static inline int __init setup_x86_mrst_timer(char *arg)
309 return 0; 331 return 0;
310} 332}
311__setup("x86_mrst_timer=", setup_x86_mrst_timer); 333__setup("x86_mrst_timer=", setup_x86_mrst_timer);
334
335/*
336 * Parsing GPIO table first, since the DEVS table will need this table
337 * to map the pin name to the actual pin.
338 */
339static struct sfi_gpio_table_entry *gpio_table;
340static int gpio_num_entry;
341
342static int __init sfi_parse_gpio(struct sfi_table_header *table)
343{
344 struct sfi_table_simple *sb;
345 struct sfi_gpio_table_entry *pentry;
346 int num, i;
347
348 if (gpio_table)
349 return 0;
350 sb = (struct sfi_table_simple *)table;
351 num = SFI_GET_NUM_ENTRIES(sb, struct sfi_gpio_table_entry);
352 pentry = (struct sfi_gpio_table_entry *)sb->pentry;
353
354 gpio_table = (struct sfi_gpio_table_entry *)
355 kmalloc(num * sizeof(*pentry), GFP_KERNEL);
356 if (!gpio_table)
357 return -1;
358 memcpy(gpio_table, pentry, num * sizeof(*pentry));
359 gpio_num_entry = num;
360
361 pr_debug("GPIO pin info:\n");
362 for (i = 0; i < num; i++, pentry++)
363 pr_debug("info[%2d]: controller = %16.16s, pin_name = %16.16s,"
364 " pin = %d\n", i,
365 pentry->controller_name,
366 pentry->pin_name,
367 pentry->pin_no);
368 return 0;
369}
370
371static int get_gpio_by_name(const char *name)
372{
373 struct sfi_gpio_table_entry *pentry = gpio_table;
374 int i;
375
376 if (!pentry)
377 return -1;
378 for (i = 0; i < gpio_num_entry; i++, pentry++) {
379 if (!strncmp(name, pentry->pin_name, SFI_NAME_LEN))
380 return pentry->pin_no;
381 }
382 return -1;
383}
384
385/*
386 * Here defines the array of devices platform data that IAFW would export
387 * through SFI "DEVS" table, we use name and type to match the device and
388 * its platform data.
389 */
390struct devs_id {
391 char name[SFI_NAME_LEN + 1];
392 u8 type;
393 u8 delay;
394 void *(*get_platform_data)(void *info);
395};
396
397/* the offset for the mapping of global gpio pin to irq */
398#define MRST_IRQ_OFFSET 0x100
399
400static void __init *pmic_gpio_platform_data(void *info)
401{
402 static struct intel_pmic_gpio_platform_data pmic_gpio_pdata;
403 int gpio_base = get_gpio_by_name("pmic_gpio_base");
404
405 if (gpio_base == -1)
406 gpio_base = 64;
407 pmic_gpio_pdata.gpio_base = gpio_base;
408 pmic_gpio_pdata.irq_base = gpio_base + MRST_IRQ_OFFSET;
409 pmic_gpio_pdata.gpiointr = 0xffffeff8;
410
411 return &pmic_gpio_pdata;
412}
413
414static void __init *max3111_platform_data(void *info)
415{
416 struct spi_board_info *spi_info = info;
417 int intr = get_gpio_by_name("max3111_int");
418
419 if (intr == -1)
420 return NULL;
421 spi_info->irq = intr + MRST_IRQ_OFFSET;
422 return NULL;
423}
424
425/* we have multiple max7315 on the board ... */
426#define MAX7315_NUM 2
427static void __init *max7315_platform_data(void *info)
428{
429 static struct pca953x_platform_data max7315_pdata[MAX7315_NUM];
430 static int nr;
431 struct pca953x_platform_data *max7315 = &max7315_pdata[nr];
432 struct i2c_board_info *i2c_info = info;
433 int gpio_base, intr;
434 char base_pin_name[SFI_NAME_LEN + 1];
435 char intr_pin_name[SFI_NAME_LEN + 1];
436
437 if (nr == MAX7315_NUM) {
438 pr_err("too many max7315s, we only support %d\n",
439 MAX7315_NUM);
440 return NULL;
441 }
442 /* we have several max7315 on the board, we only need load several
443 * instances of the same pca953x driver to cover them
444 */
445 strcpy(i2c_info->type, "max7315");
446 if (nr++) {
447 sprintf(base_pin_name, "max7315_%d_base", nr);
448 sprintf(intr_pin_name, "max7315_%d_int", nr);
449 } else {
450 strcpy(base_pin_name, "max7315_base");
451 strcpy(intr_pin_name, "max7315_int");
452 }
453
454 gpio_base = get_gpio_by_name(base_pin_name);
455 intr = get_gpio_by_name(intr_pin_name);
456
457 if (gpio_base == -1)
458 return NULL;
459 max7315->gpio_base = gpio_base;
460 if (intr != -1) {
461 i2c_info->irq = intr + MRST_IRQ_OFFSET;
462 max7315->irq_base = gpio_base + MRST_IRQ_OFFSET;
463 } else {
464 i2c_info->irq = -1;
465 max7315->irq_base = -1;
466 }
467 return max7315;
468}
469
470static void __init *emc1403_platform_data(void *info)
471{
472 static short intr2nd_pdata;
473 struct i2c_board_info *i2c_info = info;
474 int intr = get_gpio_by_name("thermal_int");
475 int intr2nd = get_gpio_by_name("thermal_alert");
476
477 if (intr == -1 || intr2nd == -1)
478 return NULL;
479
480 i2c_info->irq = intr + MRST_IRQ_OFFSET;
481 intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET;
482
483 return &intr2nd_pdata;
484}
485
486static void __init *lis331dl_platform_data(void *info)
487{
488 static short intr2nd_pdata;
489 struct i2c_board_info *i2c_info = info;
490 int intr = get_gpio_by_name("accel_int");
491 int intr2nd = get_gpio_by_name("accel_2");
492
493 if (intr == -1 || intr2nd == -1)
494 return NULL;
495
496 i2c_info->irq = intr + MRST_IRQ_OFFSET;
497 intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET;
498
499 return &intr2nd_pdata;
500}
501
502static void __init *no_platform_data(void *info)
503{
504 return NULL;
505}
506
507static const struct devs_id __initconst device_ids[] = {
508 {"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data},
509 {"spi_max3111", SFI_DEV_TYPE_SPI, 0, &max3111_platform_data},
510 {"i2c_max7315", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data},
511 {"i2c_max7315_2", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data},
512 {"emc1403", SFI_DEV_TYPE_I2C, 1, &emc1403_platform_data},
513 {"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data},
514 {"pmic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data},
515 {"msic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data},
516 {},
517};
518
519#define MAX_IPCDEVS 24
520static struct platform_device *ipc_devs[MAX_IPCDEVS];
521static int ipc_next_dev;
522
523#define MAX_SCU_SPI 24
524static struct spi_board_info *spi_devs[MAX_SCU_SPI];
525static int spi_next_dev;
526
527#define MAX_SCU_I2C 24
528static struct i2c_board_info *i2c_devs[MAX_SCU_I2C];
529static int i2c_bus[MAX_SCU_I2C];
530static int i2c_next_dev;
531
532static void __init intel_scu_device_register(struct platform_device *pdev)
533{
534 if(ipc_next_dev == MAX_IPCDEVS)
535 pr_err("too many SCU IPC devices");
536 else
537 ipc_devs[ipc_next_dev++] = pdev;
538}
539
540static void __init intel_scu_spi_device_register(struct spi_board_info *sdev)
541{
542 struct spi_board_info *new_dev;
543
544 if (spi_next_dev == MAX_SCU_SPI) {
545 pr_err("too many SCU SPI devices");
546 return;
547 }
548
549 new_dev = kzalloc(sizeof(*sdev), GFP_KERNEL);
550 if (!new_dev) {
551 pr_err("failed to alloc mem for delayed spi dev %s\n",
552 sdev->modalias);
553 return;
554 }
555 memcpy(new_dev, sdev, sizeof(*sdev));
556
557 spi_devs[spi_next_dev++] = new_dev;
558}
559
560static void __init intel_scu_i2c_device_register(int bus,
561 struct i2c_board_info *idev)
562{
563 struct i2c_board_info *new_dev;
564
565 if (i2c_next_dev == MAX_SCU_I2C) {
566 pr_err("too many SCU I2C devices");
567 return;
568 }
569
570 new_dev = kzalloc(sizeof(*idev), GFP_KERNEL);
571 if (!new_dev) {
572 pr_err("failed to alloc mem for delayed i2c dev %s\n",
573 idev->type);
574 return;
575 }
576 memcpy(new_dev, idev, sizeof(*idev));
577
578 i2c_bus[i2c_next_dev] = bus;
579 i2c_devs[i2c_next_dev++] = new_dev;
580}
581
582/* Called by IPC driver */
583void intel_scu_devices_create(void)
584{
585 int i;
586
587 for (i = 0; i < ipc_next_dev; i++)
588 platform_device_add(ipc_devs[i]);
589
590 for (i = 0; i < spi_next_dev; i++)
591 spi_register_board_info(spi_devs[i], 1);
592
593 for (i = 0; i < i2c_next_dev; i++) {
594 struct i2c_adapter *adapter;
595 struct i2c_client *client;
596
597 adapter = i2c_get_adapter(i2c_bus[i]);
598 if (adapter) {
599 client = i2c_new_device(adapter, i2c_devs[i]);
600 if (!client)
601 pr_err("can't create i2c device %s\n",
602 i2c_devs[i]->type);
603 } else
604 i2c_register_board_info(i2c_bus[i], i2c_devs[i], 1);
605 }
606}
607EXPORT_SYMBOL_GPL(intel_scu_devices_create);
608
609/* Called by IPC driver */
610void intel_scu_devices_destroy(void)
611{
612 int i;
613
614 for (i = 0; i < ipc_next_dev; i++)
615 platform_device_del(ipc_devs[i]);
616}
617EXPORT_SYMBOL_GPL(intel_scu_devices_destroy);
618
619static void __init install_irq_resource(struct platform_device *pdev, int irq)
620{
621 /* Single threaded */
622 static struct resource __initdata res = {
623 .name = "IRQ",
624 .flags = IORESOURCE_IRQ,
625 };
626 res.start = irq;
627 platform_device_add_resources(pdev, &res, 1);
628}
629
630static void __init sfi_handle_ipc_dev(struct platform_device *pdev)
631{
632 const struct devs_id *dev = device_ids;
633 void *pdata = NULL;
634
635 while (dev->name[0]) {
636 if (dev->type == SFI_DEV_TYPE_IPC &&
637 !strncmp(dev->name, pdev->name, SFI_NAME_LEN)) {
638 pdata = dev->get_platform_data(pdev);
639 break;
640 }
641 dev++;
642 }
643 pdev->dev.platform_data = pdata;
644 intel_scu_device_register(pdev);
645}
646
647static void __init sfi_handle_spi_dev(struct spi_board_info *spi_info)
648{
649 const struct devs_id *dev = device_ids;
650 void *pdata = NULL;
651
652 while (dev->name[0]) {
653 if (dev->type == SFI_DEV_TYPE_SPI &&
654 !strncmp(dev->name, spi_info->modalias, SFI_NAME_LEN)) {
655 pdata = dev->get_platform_data(spi_info);
656 break;
657 }
658 dev++;
659 }
660 spi_info->platform_data = pdata;
661 if (dev->delay)
662 intel_scu_spi_device_register(spi_info);
663 else
664 spi_register_board_info(spi_info, 1);
665}
666
667static void __init sfi_handle_i2c_dev(int bus, struct i2c_board_info *i2c_info)
668{
669 const struct devs_id *dev = device_ids;
670 void *pdata = NULL;
671
672 while (dev->name[0]) {
673 if (dev->type == SFI_DEV_TYPE_I2C &&
674 !strncmp(dev->name, i2c_info->type, SFI_NAME_LEN)) {
675 pdata = dev->get_platform_data(i2c_info);
676 break;
677 }
678 dev++;
679 }
680 i2c_info->platform_data = pdata;
681
682 if (dev->delay)
683 intel_scu_i2c_device_register(bus, i2c_info);
684 else
685 i2c_register_board_info(bus, i2c_info, 1);
686 }
687
688
689static int __init sfi_parse_devs(struct sfi_table_header *table)
690{
691 struct sfi_table_simple *sb;
692 struct sfi_device_table_entry *pentry;
693 struct spi_board_info spi_info;
694 struct i2c_board_info i2c_info;
695 struct platform_device *pdev;
696 int num, i, bus;
697 int ioapic;
698 struct io_apic_irq_attr irq_attr;
699
700 sb = (struct sfi_table_simple *)table;
701 num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry);
702 pentry = (struct sfi_device_table_entry *)sb->pentry;
703
704 for (i = 0; i < num; i++, pentry++) {
705 if (pentry->irq != (u8)0xff) { /* native RTE case */
706 /* these SPI2 devices are not exposed to system as PCI
707 * devices, but they have separate RTE entry in IOAPIC
708 * so we have to enable them one by one here
709 */
710 ioapic = mp_find_ioapic(pentry->irq);
711 irq_attr.ioapic = ioapic;
712 irq_attr.ioapic_pin = pentry->irq;
713 irq_attr.trigger = 1;
714 irq_attr.polarity = 1;
715 io_apic_set_pci_routing(NULL, pentry->irq, &irq_attr);
716 }
717 switch (pentry->type) {
718 case SFI_DEV_TYPE_IPC:
719 /* ID as IRQ is a hack that will go away */
720 pdev = platform_device_alloc(pentry->name, pentry->irq);
721 if (pdev == NULL) {
722 pr_err("out of memory for SFI platform device '%s'.\n",
723 pentry->name);
724 continue;
725 }
726 install_irq_resource(pdev, pentry->irq);
727 pr_debug("info[%2d]: IPC bus, name = %16.16s, "
728 "irq = 0x%2x\n", i, pentry->name, pentry->irq);
729 sfi_handle_ipc_dev(pdev);
730 break;
731 case SFI_DEV_TYPE_SPI:
732 memset(&spi_info, 0, sizeof(spi_info));
733 strncpy(spi_info.modalias, pentry->name, SFI_NAME_LEN);
734 spi_info.irq = pentry->irq;
735 spi_info.bus_num = pentry->host_num;
736 spi_info.chip_select = pentry->addr;
737 spi_info.max_speed_hz = pentry->max_freq;
738 pr_debug("info[%2d]: SPI bus = %d, name = %16.16s, "
739 "irq = 0x%2x, max_freq = %d, cs = %d\n", i,
740 spi_info.bus_num,
741 spi_info.modalias,
742 spi_info.irq,
743 spi_info.max_speed_hz,
744 spi_info.chip_select);
745 sfi_handle_spi_dev(&spi_info);
746 break;
747 case SFI_DEV_TYPE_I2C:
748 memset(&i2c_info, 0, sizeof(i2c_info));
749 bus = pentry->host_num;
750 strncpy(i2c_info.type, pentry->name, SFI_NAME_LEN);
751 i2c_info.irq = pentry->irq;
752 i2c_info.addr = pentry->addr;
753 pr_debug("info[%2d]: I2C bus = %d, name = %16.16s, "
754 "irq = 0x%2x, addr = 0x%x\n", i, bus,
755 i2c_info.type,
756 i2c_info.irq,
757 i2c_info.addr);
758 sfi_handle_i2c_dev(bus, &i2c_info);
759 break;
760 case SFI_DEV_TYPE_UART:
761 case SFI_DEV_TYPE_HSI:
762 default:
763 ;
764 }
765 }
766 return 0;
767}
768
769static int __init mrst_platform_init(void)
770{
771 sfi_table_parse(SFI_SIG_GPIO, NULL, NULL, sfi_parse_gpio);
772 sfi_table_parse(SFI_SIG_DEVS, NULL, NULL, sfi_parse_devs);
773 return 0;
774}
775arch_initcall(mrst_platform_init);
776
777/*
778 * we will search these buttons in SFI GPIO table (by name)
779 * and register them dynamically. Please add all possible
780 * buttons here, we will shrink them if no GPIO found.
781 */
782static struct gpio_keys_button gpio_button[] = {
783 {KEY_POWER, -1, 1, "power_btn", EV_KEY, 0, 3000},
784 {KEY_PROG1, -1, 1, "prog_btn1", EV_KEY, 0, 20},
785 {KEY_PROG2, -1, 1, "prog_btn2", EV_KEY, 0, 20},
786 {SW_LID, -1, 1, "lid_switch", EV_SW, 0, 20},
787 {KEY_VOLUMEUP, -1, 1, "vol_up", EV_KEY, 0, 20},
788 {KEY_VOLUMEDOWN, -1, 1, "vol_down", EV_KEY, 0, 20},
789 {KEY_CAMERA, -1, 1, "camera_full", EV_KEY, 0, 20},
790 {KEY_CAMERA_FOCUS, -1, 1, "camera_half", EV_KEY, 0, 20},
791 {SW_KEYPAD_SLIDE, -1, 1, "MagSw1", EV_SW, 0, 20},
792 {SW_KEYPAD_SLIDE, -1, 1, "MagSw2", EV_SW, 0, 20},
793};
794
795static struct gpio_keys_platform_data mrst_gpio_keys = {
796 .buttons = gpio_button,
797 .rep = 1,
798 .nbuttons = -1, /* will fill it after search */
799};
800
801static struct platform_device pb_device = {
802 .name = "gpio-keys",
803 .id = -1,
804 .dev = {
805 .platform_data = &mrst_gpio_keys,
806 },
807};
808
809/*
810 * Shrink the non-existent buttons, register the gpio button
811 * device if there is some
812 */
813static int __init pb_keys_init(void)
814{
815 struct gpio_keys_button *gb = gpio_button;
816 int i, num, good = 0;
817
818 num = sizeof(gpio_button) / sizeof(struct gpio_keys_button);
819 for (i = 0; i < num; i++) {
820 gb[i].gpio = get_gpio_by_name(gb[i].desc);
821 if (gb[i].gpio == -1)
822 continue;
823
824 if (i != good)
825 gb[good] = gb[i];
826 good++;
827 }
828
829 if (good) {
830 mrst_gpio_keys.nbuttons = good;
831 return platform_device_register(&pb_device);
832 }
833 return 0;
834}
835late_initcall(pb_keys_init);
diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c
new file mode 100644
index 000000000000..32cd7edd71a0
--- /dev/null
+++ b/arch/x86/platform/mrst/vrtc.c
@@ -0,0 +1,165 @@
1/*
2 * vrtc.c: Driver for virtual RTC device on Intel MID platform
3 *
4 * (C) Copyright 2009 Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; version 2
9 * of the License.
10 *
11 * Note:
12 * VRTC is emulated by system controller firmware, the real HW
13 * RTC is located in the PMIC device. SCU FW shadows PMIC RTC
14 * in a memory mapped IO space that is visible to the host IA
15 * processor.
16 *
17 * This driver is based on RTC CMOS driver.
18 */
19
20#include <linux/kernel.h>
21#include <linux/init.h>
22#include <linux/sfi.h>
23#include <linux/platform_device.h>
24
25#include <asm/mrst.h>
26#include <asm/mrst-vrtc.h>
27#include <asm/time.h>
28#include <asm/fixmap.h>
29
30static unsigned char __iomem *vrtc_virt_base;
31
32unsigned char vrtc_cmos_read(unsigned char reg)
33{
34 unsigned char retval;
35
36 /* vRTC's registers range from 0x0 to 0xD */
37 if (reg > 0xd || !vrtc_virt_base)
38 return 0xff;
39
40 lock_cmos_prefix(reg);
41 retval = __raw_readb(vrtc_virt_base + (reg << 2));
42 lock_cmos_suffix(reg);
43 return retval;
44}
45EXPORT_SYMBOL_GPL(vrtc_cmos_read);
46
47void vrtc_cmos_write(unsigned char val, unsigned char reg)
48{
49 if (reg > 0xd || !vrtc_virt_base)
50 return;
51
52 lock_cmos_prefix(reg);
53 __raw_writeb(val, vrtc_virt_base + (reg << 2));
54 lock_cmos_suffix(reg);
55}
56EXPORT_SYMBOL_GPL(vrtc_cmos_write);
57
58unsigned long vrtc_get_time(void)
59{
60 u8 sec, min, hour, mday, mon;
61 u32 year;
62
63 while ((vrtc_cmos_read(RTC_FREQ_SELECT) & RTC_UIP))
64 cpu_relax();
65
66 sec = vrtc_cmos_read(RTC_SECONDS);
67 min = vrtc_cmos_read(RTC_MINUTES);
68 hour = vrtc_cmos_read(RTC_HOURS);
69 mday = vrtc_cmos_read(RTC_DAY_OF_MONTH);
70 mon = vrtc_cmos_read(RTC_MONTH);
71 year = vrtc_cmos_read(RTC_YEAR);
72
73 /* vRTC YEAR reg contains the offset to 1960 */
74 year += 1960;
75
76 printk(KERN_INFO "vRTC: sec: %d min: %d hour: %d day: %d "
77 "mon: %d year: %d\n", sec, min, hour, mday, mon, year);
78
79 return mktime(year, mon, mday, hour, min, sec);
80}
81
82/* Only care about the minutes and seconds */
83int vrtc_set_mmss(unsigned long nowtime)
84{
85 int real_sec, real_min;
86 int vrtc_min;
87
88 vrtc_min = vrtc_cmos_read(RTC_MINUTES);
89
90 real_sec = nowtime % 60;
91 real_min = nowtime / 60;
92 if (((abs(real_min - vrtc_min) + 15)/30) & 1)
93 real_min += 30;
94 real_min %= 60;
95
96 vrtc_cmos_write(real_sec, RTC_SECONDS);
97 vrtc_cmos_write(real_min, RTC_MINUTES);
98 return 0;
99}
100
101void __init mrst_rtc_init(void)
102{
103 unsigned long rtc_paddr;
104 void __iomem *virt_base;
105
106 sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
107 if (!sfi_mrtc_num)
108 return;
109
110 rtc_paddr = sfi_mrtc_array[0].phys_addr;
111
112 /* vRTC's register address may not be page aligned */
113 set_fixmap_nocache(FIX_LNW_VRTC, rtc_paddr);
114
115 virt_base = (void __iomem *)__fix_to_virt(FIX_LNW_VRTC);
116 virt_base += rtc_paddr & ~PAGE_MASK;
117 vrtc_virt_base = virt_base;
118
119 x86_platform.get_wallclock = vrtc_get_time;
120 x86_platform.set_wallclock = vrtc_set_mmss;
121}
122
123/*
124 * The Moorestown platform has a memory mapped virtual RTC device that emulates
125 * the programming interface of the RTC.
126 */
127
128static struct resource vrtc_resources[] = {
129 [0] = {
130 .flags = IORESOURCE_MEM,
131 },
132 [1] = {
133 .flags = IORESOURCE_IRQ,
134 }
135};
136
137static struct platform_device vrtc_device = {
138 .name = "rtc_mrst",
139 .id = -1,
140 .resource = vrtc_resources,
141 .num_resources = ARRAY_SIZE(vrtc_resources),
142};
143
144/* Register the RTC device if appropriate */
145static int __init mrst_device_create(void)
146{
147 /* No Moorestown, no device */
148 if (!mrst_identify_cpu())
149 return -ENODEV;
150 /* No timer, no device */
151 if (!sfi_mrtc_num)
152 return -ENODEV;
153
154 /* iomem resource */
155 vrtc_resources[0].start = sfi_mrtc_array[0].phys_addr;
156 vrtc_resources[0].end = sfi_mrtc_array[0].phys_addr +
157 MRST_VRTC_MAP_SZ;
158 /* irq resource */
159 vrtc_resources[1].start = sfi_mrtc_array[0].irq;
160 vrtc_resources[1].end = sfi_mrtc_array[0].irq;
161
162 return platform_device_register(&vrtc_device);
163}
164
165module_init(mrst_device_create);
diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c
index dd4c281ffe57..ca54875ac795 100644
--- a/arch/x86/platform/sfi/sfi.c
+++ b/arch/x86/platform/sfi/sfi.c
@@ -48,9 +48,9 @@ static void __init mp_sfi_register_lapic_address(unsigned long address)
48/* All CPUs enumerated by SFI must be present and enabled */ 48/* All CPUs enumerated by SFI must be present and enabled */
49static void __cpuinit mp_sfi_register_lapic(u8 id) 49static void __cpuinit mp_sfi_register_lapic(u8 id)
50{ 50{
51 if (MAX_APICS - id <= 0) { 51 if (MAX_LOCAL_APIC - id <= 0) {
52 pr_warning("Processor #%d invalid (max %d)\n", 52 pr_warning("Processor #%d invalid (max %d)\n",
53 id, MAX_APICS); 53 id, MAX_LOCAL_APIC);
54 return; 54 return;
55 } 55 }
56 56
diff --git a/arch/x86/platform/visws/visws_quirks.c b/arch/x86/platform/visws/visws_quirks.c
index 3371bd053b89..632037671746 100644
--- a/arch/x86/platform/visws/visws_quirks.c
+++ b/arch/x86/platform/visws/visws_quirks.c
@@ -171,7 +171,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
171 ver = m->apicver; 171 ver = m->apicver;
172 if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) { 172 if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) {
173 printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", 173 printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
174 m->apicid, MAX_APICS); 174 m->apicid, MAX_LOCAL_APIC);
175 return; 175 return;
176 } 176 }
177 177
diff --git a/drivers/acpi/acpica/evgpeinit.c b/drivers/acpi/acpica/evgpeinit.c
index 2c7def95f721..4c8dea513b66 100644
--- a/drivers/acpi/acpica/evgpeinit.c
+++ b/drivers/acpi/acpica/evgpeinit.c
@@ -408,6 +408,9 @@ acpi_ev_match_gpe_method(acpi_handle obj_handle,
408 return_ACPI_STATUS(AE_OK); 408 return_ACPI_STATUS(AE_OK);
409 } 409 }
410 410
411 /* Disable the GPE in case it's been enabled already. */
412 (void)acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_DISABLE);
413
411 /* 414 /*
412 * Add the GPE information from above to the gpe_event_info block for 415 * Add the GPE information from above to the gpe_event_info block for
413 * use during dispatch of this GPE. 416 * use during dispatch of this GPE.
diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c
index 660a2728908d..0cac7ec0d2ec 100644
--- a/drivers/acpi/acpica/nsinit.c
+++ b/drivers/acpi/acpica/nsinit.c
@@ -577,9 +577,7 @@ acpi_ns_init_one_device(acpi_handle obj_handle,
577 * as possible (without an NMI being received in the middle of 577 * as possible (without an NMI being received in the middle of
578 * this) - so disable NMIs and initialize the device: 578 * this) - so disable NMIs and initialize the device:
579 */ 579 */
580 acpi_nmi_disable();
581 status = acpi_ns_evaluate(info); 580 status = acpi_ns_evaluate(info);
582 acpi_nmi_enable();
583 581
584 if (ACPI_SUCCESS(status)) { 582 if (ACPI_SUCCESS(status)) {
585 walk_info->num_INI++; 583 walk_info->num_INI++;
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 9fb9d5ac939d..95649d373071 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -130,8 +130,6 @@ struct acpi_battery {
130 unsigned long flags; 130 unsigned long flags;
131}; 131};
132 132
133static int acpi_battery_update(struct acpi_battery *battery);
134
135#define to_acpi_battery(x) container_of(x, struct acpi_battery, bat); 133#define to_acpi_battery(x) container_of(x, struct acpi_battery, bat);
136 134
137inline int acpi_battery_present(struct acpi_battery *battery) 135inline int acpi_battery_present(struct acpi_battery *battery)
@@ -186,9 +184,6 @@ static int acpi_battery_get_property(struct power_supply *psy,
186 int ret = 0; 184 int ret = 0;
187 struct acpi_battery *battery = to_acpi_battery(psy); 185 struct acpi_battery *battery = to_acpi_battery(psy);
188 186
189 if (acpi_battery_update(battery))
190 return -ENODEV;
191
192 if (acpi_battery_present(battery)) { 187 if (acpi_battery_present(battery)) {
193 /* run battery update only if it is present */ 188 /* run battery update only if it is present */
194 acpi_battery_get_state(battery); 189 acpi_battery_get_state(battery);
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 5718566e00f9..d9926afec110 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -275,13 +275,23 @@ acpi_table_parse_srat(enum acpi_srat_type id,
275int __init acpi_numa_init(void) 275int __init acpi_numa_init(void)
276{ 276{
277 int ret = 0; 277 int ret = 0;
278 int nr_cpu_entries = nr_cpu_ids;
279
280#ifdef CONFIG_X86
281 /*
282 * Should not limit number with cpu num that is from NR_CPUS or nr_cpus=
283 * SRAT cpu entries could have different order with that in MADT.
284 * So go over all cpu entries in SRAT to get apicid to node mapping.
285 */
286 nr_cpu_entries = MAX_LOCAL_APIC;
287#endif
278 288
279 /* SRAT: Static Resource Affinity Table */ 289 /* SRAT: Static Resource Affinity Table */
280 if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) { 290 if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
281 acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY, 291 acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY,
282 acpi_parse_x2apic_affinity, nr_cpu_ids); 292 acpi_parse_x2apic_affinity, nr_cpu_entries);
283 acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY, 293 acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
284 acpi_parse_processor_affinity, nr_cpu_ids); 294 acpi_parse_processor_affinity, nr_cpu_entries);
285 ret = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, 295 ret = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
286 acpi_parse_memory_affinity, 296 acpi_parse_memory_affinity,
287 NR_NODE_MEMBLKS); 297 NR_NODE_MEMBLKS);
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 2b6c21d86b98..29ef505c487b 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -705,54 +705,85 @@ static int acpi_bus_get_perf_flags(struct acpi_device *device)
705} 705}
706 706
707static acpi_status 707static acpi_status
708acpi_bus_extract_wakeup_device_power_package(struct acpi_device *device, 708acpi_bus_extract_wakeup_device_power_package(acpi_handle handle,
709 union acpi_object *package) 709 struct acpi_device_wakeup *wakeup)
710{ 710{
711 int i = 0; 711 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
712 union acpi_object *package = NULL;
712 union acpi_object *element = NULL; 713 union acpi_object *element = NULL;
714 acpi_status status;
715 int i = 0;
713 716
714 if (!device || !package || (package->package.count < 2)) 717 if (!wakeup)
715 return AE_BAD_PARAMETER; 718 return AE_BAD_PARAMETER;
716 719
720 /* _PRW */
721 status = acpi_evaluate_object(handle, "_PRW", NULL, &buffer);
722 if (ACPI_FAILURE(status)) {
723 ACPI_EXCEPTION((AE_INFO, status, "Evaluating _PRW"));
724 return status;
725 }
726
727 package = (union acpi_object *)buffer.pointer;
728
729 if (!package || (package->package.count < 2)) {
730 status = AE_BAD_DATA;
731 goto out;
732 }
733
717 element = &(package->package.elements[0]); 734 element = &(package->package.elements[0]);
718 if (!element) 735 if (!element) {
719 return AE_BAD_PARAMETER; 736 status = AE_BAD_DATA;
737 goto out;
738 }
720 if (element->type == ACPI_TYPE_PACKAGE) { 739 if (element->type == ACPI_TYPE_PACKAGE) {
721 if ((element->package.count < 2) || 740 if ((element->package.count < 2) ||
722 (element->package.elements[0].type != 741 (element->package.elements[0].type !=
723 ACPI_TYPE_LOCAL_REFERENCE) 742 ACPI_TYPE_LOCAL_REFERENCE)
724 || (element->package.elements[1].type != ACPI_TYPE_INTEGER)) 743 || (element->package.elements[1].type != ACPI_TYPE_INTEGER)) {
725 return AE_BAD_DATA; 744 status = AE_BAD_DATA;
726 device->wakeup.gpe_device = 745 goto out;
746 }
747 wakeup->gpe_device =
727 element->package.elements[0].reference.handle; 748 element->package.elements[0].reference.handle;
728 device->wakeup.gpe_number = 749 wakeup->gpe_number =
729 (u32) element->package.elements[1].integer.value; 750 (u32) element->package.elements[1].integer.value;
730 } else if (element->type == ACPI_TYPE_INTEGER) { 751 } else if (element->type == ACPI_TYPE_INTEGER) {
731 device->wakeup.gpe_number = element->integer.value; 752 wakeup->gpe_device = NULL;
732 } else 753 wakeup->gpe_number = element->integer.value;
733 return AE_BAD_DATA; 754 } else {
755 status = AE_BAD_DATA;
756 goto out;
757 }
734 758
735 element = &(package->package.elements[1]); 759 element = &(package->package.elements[1]);
736 if (element->type != ACPI_TYPE_INTEGER) { 760 if (element->type != ACPI_TYPE_INTEGER) {
737 return AE_BAD_DATA; 761 status = AE_BAD_DATA;
762 goto out;
738 } 763 }
739 device->wakeup.sleep_state = element->integer.value; 764 wakeup->sleep_state = element->integer.value;
740 765
741 if ((package->package.count - 2) > ACPI_MAX_HANDLES) { 766 if ((package->package.count - 2) > ACPI_MAX_HANDLES) {
742 return AE_NO_MEMORY; 767 status = AE_NO_MEMORY;
768 goto out;
743 } 769 }
744 device->wakeup.resources.count = package->package.count - 2; 770 wakeup->resources.count = package->package.count - 2;
745 for (i = 0; i < device->wakeup.resources.count; i++) { 771 for (i = 0; i < wakeup->resources.count; i++) {
746 element = &(package->package.elements[i + 2]); 772 element = &(package->package.elements[i + 2]);
747 if (element->type != ACPI_TYPE_LOCAL_REFERENCE) 773 if (element->type != ACPI_TYPE_LOCAL_REFERENCE) {
748 return AE_BAD_DATA; 774 status = AE_BAD_DATA;
775 goto out;
776 }
749 777
750 device->wakeup.resources.handles[i] = element->reference.handle; 778 wakeup->resources.handles[i] = element->reference.handle;
751 } 779 }
752 780
753 acpi_gpe_can_wake(device->wakeup.gpe_device, device->wakeup.gpe_number); 781 acpi_gpe_can_wake(wakeup->gpe_device, wakeup->gpe_number);
754 782
755 return AE_OK; 783 out:
784 kfree(buffer.pointer);
785
786 return status;
756} 787}
757 788
758static void acpi_bus_set_run_wake_flags(struct acpi_device *device) 789static void acpi_bus_set_run_wake_flags(struct acpi_device *device)
@@ -787,26 +818,15 @@ static void acpi_bus_set_run_wake_flags(struct acpi_device *device)
787static int acpi_bus_get_wakeup_device_flags(struct acpi_device *device) 818static int acpi_bus_get_wakeup_device_flags(struct acpi_device *device)
788{ 819{
789 acpi_status status = 0; 820 acpi_status status = 0;
790 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
791 union acpi_object *package = NULL;
792 int psw_error; 821 int psw_error;
793 822
794 /* _PRW */ 823 status = acpi_bus_extract_wakeup_device_power_package(device->handle,
795 status = acpi_evaluate_object(device->handle, "_PRW", NULL, &buffer); 824 &device->wakeup);
796 if (ACPI_FAILURE(status)) {
797 ACPI_EXCEPTION((AE_INFO, status, "Evaluating _PRW"));
798 goto end;
799 }
800
801 package = (union acpi_object *)buffer.pointer;
802 status = acpi_bus_extract_wakeup_device_power_package(device, package);
803 if (ACPI_FAILURE(status)) { 825 if (ACPI_FAILURE(status)) {
804 ACPI_EXCEPTION((AE_INFO, status, "Extracting _PRW package")); 826 ACPI_EXCEPTION((AE_INFO, status, "Extracting _PRW package"));
805 goto end; 827 goto end;
806 } 828 }
807 829
808 kfree(buffer.pointer);
809
810 device->wakeup.flags.valid = 1; 830 device->wakeup.flags.valid = 1;
811 device->wakeup.prepare_count = 0; 831 device->wakeup.prepare_count = 0;
812 acpi_bus_set_run_wake_flags(device); 832 acpi_bus_set_run_wake_flags(device);
@@ -1351,6 +1371,7 @@ static acpi_status acpi_bus_check_add(acpi_handle handle, u32 lvl,
1351 struct acpi_bus_ops *ops = context; 1371 struct acpi_bus_ops *ops = context;
1352 int type; 1372 int type;
1353 unsigned long long sta; 1373 unsigned long long sta;
1374 struct acpi_device_wakeup wakeup;
1354 struct acpi_device *device; 1375 struct acpi_device *device;
1355 acpi_status status; 1376 acpi_status status;
1356 int result; 1377 int result;
@@ -1360,8 +1381,10 @@ static acpi_status acpi_bus_check_add(acpi_handle handle, u32 lvl,
1360 return AE_OK; 1381 return AE_OK;
1361 1382
1362 if (!(sta & ACPI_STA_DEVICE_PRESENT) && 1383 if (!(sta & ACPI_STA_DEVICE_PRESENT) &&
1363 !(sta & ACPI_STA_DEVICE_FUNCTIONING)) 1384 !(sta & ACPI_STA_DEVICE_FUNCTIONING)) {
1385 acpi_bus_extract_wakeup_device_power_package(handle, &wakeup);
1364 return AE_CTRL_DEPTH; 1386 return AE_CTRL_DEPTH;
1387 }
1365 1388
1366 /* 1389 /*
1367 * We may already have an acpi_device from a previous enumeration. If 1390 * We may already have an acpi_device from a previous enumeration. If
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 11ec911016c6..36e2319264bd 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -128,16 +128,6 @@ config PDC_ADMA
128 128
129 If unsure, say N. 129 If unsure, say N.
130 130
131config PATA_MPC52xx
132 tristate "Freescale MPC52xx SoC internal IDE"
133 depends on PPC_MPC52xx && PPC_BESTCOMM
134 select PPC_BESTCOMM_ATA
135 help
136 This option enables support for integrated IDE controller
137 of the Freescale MPC52xx SoC.
138
139 If unsure, say N.
140
141config PATA_OCTEON_CF 131config PATA_OCTEON_CF
142 tristate "OCTEON Boot Bus Compact Flash support" 132 tristate "OCTEON Boot Bus Compact Flash support"
143 depends on CPU_CAVIUM_OCTEON 133 depends on CPU_CAVIUM_OCTEON
@@ -366,7 +356,7 @@ config PATA_CS5535
366 356
367config PATA_CS5536 357config PATA_CS5536
368 tristate "CS5536 PATA support" 358 tristate "CS5536 PATA support"
369 depends on PCI && X86 && !X86_64 359 depends on PCI
370 help 360 help
371 This option enables support for the AMD CS5536 361 This option enables support for the AMD CS5536
372 companion chip used with the Geode LX processor family. 362 companion chip used with the Geode LX processor family.
@@ -491,6 +481,16 @@ config PATA_MARVELL
491 481
492 If unsure, say N. 482 If unsure, say N.
493 483
484config PATA_MPC52xx
485 tristate "Freescale MPC52xx SoC internal IDE"
486 depends on PPC_MPC52xx && PPC_BESTCOMM
487 select PPC_BESTCOMM_ATA
488 help
489 This option enables support for integrated IDE controller
490 of the Freescale MPC52xx SoC.
491
492 If unsure, say N.
493
494config PATA_NETCELL 494config PATA_NETCELL
495 tristate "NETCELL Revolution RAID support" 495 tristate "NETCELL Revolution RAID support"
496 depends on PCI 496 depends on PCI
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index c501af5b12b9..2b67c900a459 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -11,7 +11,6 @@ obj-$(CONFIG_SATA_DWC) += sata_dwc_460ex.o
11 11
12# SFF w/ custom DMA 12# SFF w/ custom DMA
13obj-$(CONFIG_PDC_ADMA) += pdc_adma.o 13obj-$(CONFIG_PDC_ADMA) += pdc_adma.o
14obj-$(CONFIG_PATA_MPC52xx) += pata_mpc52xx.o
15obj-$(CONFIG_PATA_OCTEON_CF) += pata_octeon_cf.o 14obj-$(CONFIG_PATA_OCTEON_CF) += pata_octeon_cf.o
16obj-$(CONFIG_SATA_QSTOR) += sata_qstor.o 15obj-$(CONFIG_SATA_QSTOR) += sata_qstor.o
17obj-$(CONFIG_SATA_SX4) += sata_sx4.o 16obj-$(CONFIG_SATA_SX4) += sata_sx4.o
@@ -52,6 +51,7 @@ obj-$(CONFIG_PATA_IT821X) += pata_it821x.o
52obj-$(CONFIG_PATA_JMICRON) += pata_jmicron.o 51obj-$(CONFIG_PATA_JMICRON) += pata_jmicron.o
53obj-$(CONFIG_PATA_MACIO) += pata_macio.o 52obj-$(CONFIG_PATA_MACIO) += pata_macio.o
54obj-$(CONFIG_PATA_MARVELL) += pata_marvell.o 53obj-$(CONFIG_PATA_MARVELL) += pata_marvell.o
54obj-$(CONFIG_PATA_MPC52xx) += pata_mpc52xx.o
55obj-$(CONFIG_PATA_NETCELL) += pata_netcell.o 55obj-$(CONFIG_PATA_NETCELL) += pata_netcell.o
56obj-$(CONFIG_PATA_NINJA32) += pata_ninja32.o 56obj-$(CONFIG_PATA_NINJA32) += pata_ninja32.o
57obj-$(CONFIG_PATA_NS87415) += pata_ns87415.o 57obj-$(CONFIG_PATA_NS87415) += pata_ns87415.o
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 7f77c67d267c..f23d6d46b95b 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4807,9 +4807,6 @@ static void ata_verify_xfer(struct ata_queued_cmd *qc)
4807{ 4807{
4808 struct ata_device *dev = qc->dev; 4808 struct ata_device *dev = qc->dev;
4809 4809
4810 if (ata_tag_internal(qc->tag))
4811 return;
4812
4813 if (ata_is_nodata(qc->tf.protocol)) 4810 if (ata_is_nodata(qc->tf.protocol))
4814 return; 4811 return;
4815 4812
@@ -4858,14 +4855,23 @@ void ata_qc_complete(struct ata_queued_cmd *qc)
4858 if (unlikely(qc->err_mask)) 4855 if (unlikely(qc->err_mask))
4859 qc->flags |= ATA_QCFLAG_FAILED; 4856 qc->flags |= ATA_QCFLAG_FAILED;
4860 4857
4861 if (unlikely(qc->flags & ATA_QCFLAG_FAILED)) { 4858 /*
4862 /* always fill result TF for failed qc */ 4859 * Finish internal commands without any further processing
4860 * and always with the result TF filled.
4861 */
4862 if (unlikely(ata_tag_internal(qc->tag))) {
4863 fill_result_tf(qc); 4863 fill_result_tf(qc);
4864 __ata_qc_complete(qc);
4865 return;
4866 }
4864 4867
4865 if (!ata_tag_internal(qc->tag)) 4868 /*
4866 ata_qc_schedule_eh(qc); 4869 * Non-internal qc has failed. Fill the result TF and
4867 else 4870 * summon EH.
4868 __ata_qc_complete(qc); 4871 */
4872 if (unlikely(qc->flags & ATA_QCFLAG_FAILED)) {
4873 fill_result_tf(qc);
4874 ata_qc_schedule_eh(qc);
4869 return; 4875 return;
4870 } 4876 }
4871 4877
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 5e590504f3aa..17a637877d03 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -3275,6 +3275,7 @@ static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
3275 struct ata_port *ap = ata_is_host_link(link) ? link->ap : NULL; 3275 struct ata_port *ap = ata_is_host_link(link) ? link->ap : NULL;
3276 struct ata_eh_context *ehc = &link->eh_context; 3276 struct ata_eh_context *ehc = &link->eh_context;
3277 struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL; 3277 struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL;
3278 enum ata_lpm_policy old_policy = link->lpm_policy;
3278 unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM; 3279 unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM;
3279 unsigned int err_mask; 3280 unsigned int err_mask;
3280 int rc; 3281 int rc;
@@ -3338,6 +3339,14 @@ static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
3338 goto fail; 3339 goto fail;
3339 } 3340 }
3340 3341
3342 /*
3343 * Low level driver acked the transition. Issue DIPM command
3344 * with the new policy set.
3345 */
3346 link->lpm_policy = policy;
3347 if (ap && ap->slave_link)
3348 ap->slave_link->lpm_policy = policy;
3349
3341 /* host config updated, enable DIPM if transitioning to MIN_POWER */ 3350 /* host config updated, enable DIPM if transitioning to MIN_POWER */
3342 ata_for_each_dev(dev, link, ENABLED) { 3351 ata_for_each_dev(dev, link, ENABLED) {
3343 if (policy == ATA_LPM_MIN_POWER && ata_id_has_dipm(dev->id)) { 3352 if (policy == ATA_LPM_MIN_POWER && ata_id_has_dipm(dev->id)) {
@@ -3353,12 +3362,14 @@ static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
3353 } 3362 }
3354 } 3363 }
3355 3364
3356 link->lpm_policy = policy;
3357 if (ap && ap->slave_link)
3358 ap->slave_link->lpm_policy = policy;
3359 return 0; 3365 return 0;
3360 3366
3361fail: 3367fail:
3368 /* restore the old policy */
3369 link->lpm_policy = old_policy;
3370 if (ap && ap->slave_link)
3371 ap->slave_link->lpm_policy = old_policy;
3372
3362 /* if no device or only one more chance is left, disable LPM */ 3373 /* if no device or only one more chance is left, disable LPM */
3363 if (!dev || ehc->tries[dev->devno] <= 2) { 3374 if (!dev || ehc->tries[dev->devno] <= 2) {
3364 ata_link_printk(link, KERN_WARNING, 3375 ata_link_printk(link, KERN_WARNING,
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index d05387d1e14b..484697fef386 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -1532,11 +1532,10 @@ static unsigned int __ata_sff_port_intr(struct ata_port *ap,
1532 if (!(qc->dev->flags & ATA_DFLAG_CDB_INTR)) 1532 if (!(qc->dev->flags & ATA_DFLAG_CDB_INTR))
1533 return ata_sff_idle_irq(ap); 1533 return ata_sff_idle_irq(ap);
1534 break; 1534 break;
1535 case HSM_ST: 1535 case HSM_ST_IDLE:
1536 case HSM_ST_LAST:
1537 break;
1538 default:
1539 return ata_sff_idle_irq(ap); 1536 return ata_sff_idle_irq(ap);
1537 default:
1538 break;
1540 } 1539 }
1541 1540
1542 /* check main status, clearing INTRQ if needed */ 1541 /* check main status, clearing INTRQ if needed */
diff --git a/drivers/ata/pata_cs5536.c b/drivers/ata/pata_cs5536.c
index 21ee23f89e88..628c8fae5937 100644
--- a/drivers/ata/pata_cs5536.c
+++ b/drivers/ata/pata_cs5536.c
@@ -37,10 +37,22 @@
37#include <linux/delay.h> 37#include <linux/delay.h>
38#include <linux/libata.h> 38#include <linux/libata.h>
39#include <scsi/scsi_host.h> 39#include <scsi/scsi_host.h>
40
41#ifdef CONFIG_X86_32
40#include <asm/msr.h> 42#include <asm/msr.h>
43static int use_msr;
44module_param_named(msr, use_msr, int, 0644);
45MODULE_PARM_DESC(msr, "Force using MSR to configure IDE function (Default: 0)");
46#else
47#undef rdmsr /* avoid accidental MSR usage on, e.g. x86-64 */
48#undef wrmsr
49#define rdmsr(x, y, z) do { } while (0)
50#define wrmsr(x, y, z) do { } while (0)
51#define use_msr 0
52#endif
41 53
42#define DRV_NAME "pata_cs5536" 54#define DRV_NAME "pata_cs5536"
43#define DRV_VERSION "0.0.7" 55#define DRV_VERSION "0.0.8"
44 56
45enum { 57enum {
46 CFG = 0, 58 CFG = 0,
@@ -75,8 +87,6 @@ enum {
75 IDE_ETC_NODMA = 0x03, 87 IDE_ETC_NODMA = 0x03,
76}; 88};
77 89
78static int use_msr;
79
80static const u32 msr_reg[4] = { 90static const u32 msr_reg[4] = {
81 MSR_IDE_CFG, MSR_IDE_DTC, MSR_IDE_CAST, MSR_IDE_ETC, 91 MSR_IDE_CFG, MSR_IDE_DTC, MSR_IDE_CAST, MSR_IDE_ETC,
82}; 92};
@@ -88,7 +98,7 @@ static const u8 pci_reg[4] = {
88static inline int cs5536_read(struct pci_dev *pdev, int reg, u32 *val) 98static inline int cs5536_read(struct pci_dev *pdev, int reg, u32 *val)
89{ 99{
90 if (unlikely(use_msr)) { 100 if (unlikely(use_msr)) {
91 u32 dummy; 101 u32 dummy __maybe_unused;
92 102
93 rdmsr(msr_reg[reg], *val, dummy); 103 rdmsr(msr_reg[reg], *val, dummy);
94 return 0; 104 return 0;
@@ -294,8 +304,6 @@ MODULE_DESCRIPTION("low-level driver for the CS5536 IDE controller");
294MODULE_LICENSE("GPL"); 304MODULE_LICENSE("GPL");
295MODULE_DEVICE_TABLE(pci, cs5536); 305MODULE_DEVICE_TABLE(pci, cs5536);
296MODULE_VERSION(DRV_VERSION); 306MODULE_VERSION(DRV_VERSION);
297module_param_named(msr, use_msr, int, 0644);
298MODULE_PARM_DESC(msr, "Force using MSR to configure IDE function (Default: 0)");
299 307
300module_init(cs5536_init); 308module_init(cs5536_init);
301module_exit(cs5536_exit); 309module_exit(cs5536_exit);
diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c
index 2b464b631f22..0b0625054a87 100644
--- a/drivers/atm/atmtcp.c
+++ b/drivers/atm/atmtcp.c
@@ -392,7 +392,10 @@ static int atmtcp_attach(struct atm_vcc *vcc,int itf)
392 atm_dev_put(dev); 392 atm_dev_put(dev);
393 return -EMEDIUMTYPE; 393 return -EMEDIUMTYPE;
394 } 394 }
395 if (PRIV(dev)->vcc) return -EBUSY; 395 if (PRIV(dev)->vcc) {
396 atm_dev_put(dev);
397 return -EBUSY;
398 }
396 } 399 }
397 else { 400 else {
398 int error; 401 int error;
diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index 720148294e64..3c6cabcb7d84 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -311,8 +311,10 @@ static void hci_uart_tty_close(struct tty_struct *tty)
311 311
312 if (test_and_clear_bit(HCI_UART_PROTO_SET, &hu->flags)) { 312 if (test_and_clear_bit(HCI_UART_PROTO_SET, &hu->flags)) {
313 hu->proto->close(hu); 313 hu->proto->close(hu);
314 hci_unregister_dev(hdev); 314 if (hdev) {
315 hci_free_dev(hdev); 315 hci_unregister_dev(hdev);
316 hci_free_dev(hdev);
317 }
316 } 318 }
317 } 319 }
318} 320}
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index 42396df55556..9252e85706ef 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -38,7 +38,7 @@ static int agp_bridges_found;
38 38
39static void amd64_tlbflush(struct agp_memory *temp) 39static void amd64_tlbflush(struct agp_memory *temp)
40{ 40{
41 k8_flush_garts(); 41 amd_flush_garts();
42} 42}
43 43
44static int amd64_insert_memory(struct agp_memory *mem, off_t pg_start, int type) 44static int amd64_insert_memory(struct agp_memory *mem, off_t pg_start, int type)
@@ -124,7 +124,7 @@ static int amd64_fetch_size(void)
124 u32 temp; 124 u32 temp;
125 struct aper_size_info_32 *values; 125 struct aper_size_info_32 *values;
126 126
127 dev = k8_northbridges.nb_misc[0]; 127 dev = node_to_amd_nb(0)->misc;
128 if (dev==NULL) 128 if (dev==NULL)
129 return 0; 129 return 0;
130 130
@@ -181,16 +181,15 @@ static int amd_8151_configure(void)
181 unsigned long gatt_bus = virt_to_phys(agp_bridge->gatt_table_real); 181 unsigned long gatt_bus = virt_to_phys(agp_bridge->gatt_table_real);
182 int i; 182 int i;
183 183
184 if (!k8_northbridges.gart_supported) 184 if (!amd_nb_has_feature(AMD_NB_GART))
185 return 0; 185 return 0;
186 186
187 /* Configure AGP regs in each x86-64 host bridge. */ 187 /* Configure AGP regs in each x86-64 host bridge. */
188 for (i = 0; i < k8_northbridges.num; i++) { 188 for (i = 0; i < amd_nb_num(); i++) {
189 agp_bridge->gart_bus_addr = 189 agp_bridge->gart_bus_addr =
190 amd64_configure(k8_northbridges.nb_misc[i], 190 amd64_configure(node_to_amd_nb(i)->misc, gatt_bus);
191 gatt_bus);
192 } 191 }
193 k8_flush_garts(); 192 amd_flush_garts();
194 return 0; 193 return 0;
195} 194}
196 195
@@ -200,11 +199,11 @@ static void amd64_cleanup(void)
200 u32 tmp; 199 u32 tmp;
201 int i; 200 int i;
202 201
203 if (!k8_northbridges.gart_supported) 202 if (!amd_nb_has_feature(AMD_NB_GART))
204 return; 203 return;
205 204
206 for (i = 0; i < k8_northbridges.num; i++) { 205 for (i = 0; i < amd_nb_num(); i++) {
207 struct pci_dev *dev = k8_northbridges.nb_misc[i]; 206 struct pci_dev *dev = node_to_amd_nb(i)->misc;
208 /* disable gart translation */ 207 /* disable gart translation */
209 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &tmp); 208 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &tmp);
210 tmp &= ~GARTEN; 209 tmp &= ~GARTEN;
@@ -331,15 +330,15 @@ static __devinit int cache_nbs(struct pci_dev *pdev, u32 cap_ptr)
331{ 330{
332 int i; 331 int i;
333 332
334 if (cache_k8_northbridges() < 0) 333 if (amd_cache_northbridges() < 0)
335 return -ENODEV; 334 return -ENODEV;
336 335
337 if (!k8_northbridges.gart_supported) 336 if (!amd_nb_has_feature(AMD_NB_GART))
338 return -ENODEV; 337 return -ENODEV;
339 338
340 i = 0; 339 i = 0;
341 for (i = 0; i < k8_northbridges.num; i++) { 340 for (i = 0; i < amd_nb_num(); i++) {
342 struct pci_dev *dev = k8_northbridges.nb_misc[i]; 341 struct pci_dev *dev = node_to_amd_nb(i)->misc;
343 if (fix_northbridge(dev, pdev, cap_ptr) < 0) { 342 if (fix_northbridge(dev, pdev, cap_ptr) < 0) {
344 dev_err(&dev->dev, "no usable aperture found\n"); 343 dev_err(&dev->dev, "no usable aperture found\n");
345#ifdef __x86_64__ 344#ifdef __x86_64__
@@ -416,7 +415,7 @@ static int __devinit uli_agp_init(struct pci_dev *pdev)
416 } 415 }
417 416
418 /* shadow x86-64 registers into ULi registers */ 417 /* shadow x86-64 registers into ULi registers */
419 pci_read_config_dword (k8_northbridges.nb_misc[0], AMD64_GARTAPERTUREBASE, 418 pci_read_config_dword (node_to_amd_nb(0)->misc, AMD64_GARTAPERTUREBASE,
420 &httfea); 419 &httfea);
421 420
422 /* if x86-64 aperture base is beyond 4G, exit here */ 421 /* if x86-64 aperture base is beyond 4G, exit here */
@@ -484,7 +483,7 @@ static int nforce3_agp_init(struct pci_dev *pdev)
484 pci_write_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, tmp); 483 pci_write_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, tmp);
485 484
486 /* shadow x86-64 registers into NVIDIA registers */ 485 /* shadow x86-64 registers into NVIDIA registers */
487 pci_read_config_dword (k8_northbridges.nb_misc[0], AMD64_GARTAPERTUREBASE, 486 pci_read_config_dword (node_to_amd_nb(0)->misc, AMD64_GARTAPERTUREBASE,
488 &apbase); 487 &apbase);
489 488
490 /* if x86-64 aperture base is beyond 4G, exit here */ 489 /* if x86-64 aperture base is beyond 4G, exit here */
@@ -778,7 +777,7 @@ int __init agp_amd64_init(void)
778 } 777 }
779 778
780 /* First check that we have at least one AMD64 NB */ 779 /* First check that we have at least one AMD64 NB */
781 if (!pci_dev_present(k8_nb_ids)) 780 if (!pci_dev_present(amd_nb_misc_ids))
782 return -ENODEV; 781 return -ENODEV;
783 782
784 /* Look for any AGP bridge */ 783 /* Look for any AGP bridge */
diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 16a2847b7cdb..29ac6d499fa6 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -1192,12 +1192,19 @@ static void i9xx_chipset_flush(void)
1192 writel(1, intel_private.i9xx_flush_page); 1192 writel(1, intel_private.i9xx_flush_page);
1193} 1193}
1194 1194
1195static void i965_write_entry(dma_addr_t addr, unsigned int entry, 1195static void i965_write_entry(dma_addr_t addr,
1196 unsigned int entry,
1196 unsigned int flags) 1197 unsigned int flags)
1197{ 1198{
1199 u32 pte_flags;
1200
1201 pte_flags = I810_PTE_VALID;
1202 if (flags == AGP_USER_CACHED_MEMORY)
1203 pte_flags |= I830_PTE_SYSTEM_CACHED;
1204
1198 /* Shift high bits down */ 1205 /* Shift high bits down */
1199 addr |= (addr >> 28) & 0xf0; 1206 addr |= (addr >> 28) & 0xf0;
1200 writel(addr | I810_PTE_VALID, intel_private.gtt + entry); 1207 writel(addr | pte_flags, intel_private.gtt + entry);
1201} 1208}
1202 1209
1203static bool gen6_check_flags(unsigned int flags) 1210static bool gen6_check_flags(unsigned int flags)
diff --git a/drivers/char/ramoops.c b/drivers/char/ramoops.c
index 73dcb0ee41fd..d3d63be2cd37 100644
--- a/drivers/char/ramoops.c
+++ b/drivers/char/ramoops.c
@@ -29,7 +29,6 @@
29#include <linux/ramoops.h> 29#include <linux/ramoops.h>
30 30
31#define RAMOOPS_KERNMSG_HDR "====" 31#define RAMOOPS_KERNMSG_HDR "===="
32#define RAMOOPS_HEADER_SIZE (5 + sizeof(struct timeval))
33 32
34#define RECORD_SIZE 4096 33#define RECORD_SIZE 4096
35 34
@@ -65,8 +64,8 @@ static void ramoops_do_dump(struct kmsg_dumper *dumper,
65 struct ramoops_context, dump); 64 struct ramoops_context, dump);
66 unsigned long s1_start, s2_start; 65 unsigned long s1_start, s2_start;
67 unsigned long l1_cpy, l2_cpy; 66 unsigned long l1_cpy, l2_cpy;
68 int res; 67 int res, hdr_size;
69 char *buf; 68 char *buf, *buf_orig;
70 struct timeval timestamp; 69 struct timeval timestamp;
71 70
72 /* Only dump oopses if dump_oops is set */ 71 /* Only dump oopses if dump_oops is set */
@@ -74,6 +73,8 @@ static void ramoops_do_dump(struct kmsg_dumper *dumper,
74 return; 73 return;
75 74
76 buf = (char *)(cxt->virt_addr + (cxt->count * RECORD_SIZE)); 75 buf = (char *)(cxt->virt_addr + (cxt->count * RECORD_SIZE));
76 buf_orig = buf;
77
77 memset(buf, '\0', RECORD_SIZE); 78 memset(buf, '\0', RECORD_SIZE);
78 res = sprintf(buf, "%s", RAMOOPS_KERNMSG_HDR); 79 res = sprintf(buf, "%s", RAMOOPS_KERNMSG_HDR);
79 buf += res; 80 buf += res;
@@ -81,8 +82,9 @@ static void ramoops_do_dump(struct kmsg_dumper *dumper,
81 res = sprintf(buf, "%lu.%lu\n", (long)timestamp.tv_sec, (long)timestamp.tv_usec); 82 res = sprintf(buf, "%lu.%lu\n", (long)timestamp.tv_sec, (long)timestamp.tv_usec);
82 buf += res; 83 buf += res;
83 84
84 l2_cpy = min(l2, (unsigned long)(RECORD_SIZE - RAMOOPS_HEADER_SIZE)); 85 hdr_size = buf - buf_orig;
85 l1_cpy = min(l1, (unsigned long)(RECORD_SIZE - RAMOOPS_HEADER_SIZE) - l2_cpy); 86 l2_cpy = min(l2, (unsigned long)(RECORD_SIZE - hdr_size));
87 l1_cpy = min(l1, (unsigned long)(RECORD_SIZE - hdr_size) - l2_cpy);
86 88
87 s2_start = l2 - l2_cpy; 89 s2_start = l2 - l2_cpy;
88 s1_start = l1 - l1_cpy; 90 s1_start = l1 - l1_cpy;
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index c63a43823744..1109f6848a43 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -355,6 +355,7 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
355 dprintk("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new, 355 dprintk("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
356 (unsigned long)freqs->cpu); 356 (unsigned long)freqs->cpu);
357 trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu); 357 trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
358 trace_cpu_frequency(freqs->new, freqs->cpu);
358 srcu_notifier_call_chain(&cpufreq_transition_notifier_list, 359 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
359 CPUFREQ_POSTCHANGE, freqs); 360 CPUFREQ_POSTCHANGE, freqs);
360 if (likely(policy) && likely(policy->cpu == freqs->cpu)) 361 if (likely(policy) && likely(policy->cpu == freqs->cpu))
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index a50710843378..08d5f05378d9 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -107,6 +107,7 @@ static void cpuidle_idle_call(void)
107 if (cpuidle_curr_governor->reflect) 107 if (cpuidle_curr_governor->reflect)
108 cpuidle_curr_governor->reflect(dev); 108 cpuidle_curr_governor->reflect(dev);
109 trace_power_end(smp_processor_id()); 109 trace_power_end(smp_processor_id());
110 trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
110} 111}
111 112
112/** 113/**
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 411d5bf50fc4..a25f5f61e0e0 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -449,7 +449,7 @@ mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
449static void mv_xor_tasklet(unsigned long data) 449static void mv_xor_tasklet(unsigned long data)
450{ 450{
451 struct mv_xor_chan *chan = (struct mv_xor_chan *) data; 451 struct mv_xor_chan *chan = (struct mv_xor_chan *) data;
452 __mv_xor_slot_cleanup(chan); 452 mv_xor_slot_cleanup(chan);
453} 453}
454 454
455static struct mv_xor_desc_slot * 455static struct mv_xor_desc_slot *
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index eca9ba193e94..df211181fca4 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2917,7 +2917,7 @@ static int __init amd64_edac_init(void)
2917 2917
2918 opstate_init(); 2918 opstate_init();
2919 2919
2920 if (cache_k8_northbridges() < 0) 2920 if (amd_cache_northbridges() < 0)
2921 goto err_ret; 2921 goto err_ret;
2922 2922
2923 msrs = msrs_alloc(); 2923 msrs = msrs_alloc();
@@ -2934,7 +2934,7 @@ static int __init amd64_edac_init(void)
2934 * to finish initialization of the MC instances. 2934 * to finish initialization of the MC instances.
2935 */ 2935 */
2936 err = -ENODEV; 2936 err = -ENODEV;
2937 for (nb = 0; nb < k8_northbridges.num; nb++) { 2937 for (nb = 0; nb < amd_nb_num(); nb++) {
2938 if (!pvt_lookup[nb]) 2938 if (!pvt_lookup[nb])
2939 continue; 2939 continue;
2940 2940
diff --git a/drivers/gpio/cs5535-gpio.c b/drivers/gpio/cs5535-gpio.c
index 599f6c9e0fbf..d3e55a0ae92b 100644
--- a/drivers/gpio/cs5535-gpio.c
+++ b/drivers/gpio/cs5535-gpio.c
@@ -56,15 +56,26 @@ static struct cs5535_gpio_chip {
56 * registers, see include/linux/cs5535.h. 56 * registers, see include/linux/cs5535.h.
57 */ 57 */
58 58
59static void errata_outl(u32 val, unsigned long addr) 59static void errata_outl(struct cs5535_gpio_chip *chip, u32 val,
60 unsigned int reg)
60{ 61{
62 unsigned long addr = chip->base + 0x80 + reg;
63
61 /* 64 /*
62 * According to the CS5536 errata (#36), after suspend 65 * According to the CS5536 errata (#36), after suspend
63 * a write to the high bank GPIO register will clear all 66 * a write to the high bank GPIO register will clear all
64 * non-selected bits; the recommended workaround is a 67 * non-selected bits; the recommended workaround is a
65 * read-modify-write operation. 68 * read-modify-write operation.
69 *
70 * Don't apply this errata to the edge status GPIOs, as writing
71 * to their lower bits will clear them.
66 */ 72 */
67 val |= inl(addr); 73 if (reg != GPIO_POSITIVE_EDGE_STS && reg != GPIO_NEGATIVE_EDGE_STS) {
74 if (val & 0xffff)
75 val |= (inl(addr) & 0xffff); /* ignore the high bits */
76 else
77 val |= (inl(addr) ^ (val >> 16));
78 }
68 outl(val, addr); 79 outl(val, addr);
69} 80}
70 81
@@ -76,7 +87,7 @@ static void __cs5535_gpio_set(struct cs5535_gpio_chip *chip, unsigned offset,
76 outl(1 << offset, chip->base + reg); 87 outl(1 << offset, chip->base + reg);
77 else 88 else
78 /* high bank register */ 89 /* high bank register */
79 errata_outl(1 << (offset - 16), chip->base + 0x80 + reg); 90 errata_outl(chip, 1 << (offset - 16), reg);
80} 91}
81 92
82void cs5535_gpio_set(unsigned offset, unsigned int reg) 93void cs5535_gpio_set(unsigned offset, unsigned int reg)
@@ -98,7 +109,7 @@ static void __cs5535_gpio_clear(struct cs5535_gpio_chip *chip, unsigned offset,
98 outl(1 << (offset + 16), chip->base + reg); 109 outl(1 << (offset + 16), chip->base + reg);
99 else 110 else
100 /* high bank register */ 111 /* high bank register */
101 errata_outl(1 << offset, chip->base + 0x80 + reg); 112 errata_outl(chip, 1 << offset, reg);
102} 113}
103 114
104void cs5535_gpio_clear(unsigned offset, unsigned int reg) 115void cs5535_gpio_clear(unsigned offset, unsigned int reg)
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 21da9c19a0cb..649550e2cae9 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1281,6 +1281,9 @@ int gpio_request_one(unsigned gpio, unsigned long flags, const char *label)
1281 err = gpio_direction_output(gpio, 1281 err = gpio_direction_output(gpio,
1282 (flags & GPIOF_INIT_HIGH) ? 1 : 0); 1282 (flags & GPIOF_INIT_HIGH) ? 1 : 0);
1283 1283
1284 if (err)
1285 gpio_free(gpio);
1286
1284 return err; 1287 return err;
1285} 1288}
1286EXPORT_SYMBOL_GPL(gpio_request_one); 1289EXPORT_SYMBOL_GPL(gpio_request_one);
diff --git a/drivers/gpio/rdc321x-gpio.c b/drivers/gpio/rdc321x-gpio.c
index 2762698e0204..897e0577e65e 100644
--- a/drivers/gpio/rdc321x-gpio.c
+++ b/drivers/gpio/rdc321x-gpio.c
@@ -135,7 +135,7 @@ static int __devinit rdc321x_gpio_probe(struct platform_device *pdev)
135 struct rdc321x_gpio *rdc321x_gpio_dev; 135 struct rdc321x_gpio *rdc321x_gpio_dev;
136 struct rdc321x_gpio_pdata *pdata; 136 struct rdc321x_gpio_pdata *pdata;
137 137
138 pdata = pdev->dev.platform_data; 138 pdata = platform_get_drvdata(pdev);
139 if (!pdata) { 139 if (!pdata) {
140 dev_err(&pdev->dev, "no platform data supplied\n"); 140 dev_err(&pdev->dev, "no platform data supplied\n");
141 return -ENODEV; 141 return -ENODEV;
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index bede10a03407..2d4e17a004db 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -241,7 +241,7 @@ void drm_helper_disable_unused_functions(struct drm_device *dev)
241 } 241 }
242 242
243 list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { 243 list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
244 if (encoder->crtc && !drm_helper_encoder_in_use(encoder)) { 244 if (!drm_helper_encoder_in_use(encoder)) {
245 drm_encoder_disable(encoder); 245 drm_encoder_disable(encoder);
246 /* disconnector encoder from any connector */ 246 /* disconnector encoder from any connector */
247 encoder->crtc = NULL; 247 encoder->crtc = NULL;
@@ -874,7 +874,10 @@ static void output_poll_execute(struct work_struct *work)
874 continue; 874 continue;
875 875
876 connector->status = connector->funcs->detect(connector, false); 876 connector->status = connector->funcs->detect(connector, false);
877 DRM_DEBUG_KMS("connector status updated to %d\n", connector->status); 877 DRM_DEBUG_KMS("[CONNECTOR:%d:%s] status updated from %d to %d\n",
878 connector->base.id,
879 drm_get_connector_name(connector),
880 old_status, connector->status);
878 if (old_status != connector->status) 881 if (old_status != connector->status)
879 changed = true; 882 changed = true;
880 } 883 }
diff --git a/drivers/gpu/drm/i915/dvo_ch7017.c b/drivers/gpu/drm/i915/dvo_ch7017.c
index af70337567ce..d3e8c540f778 100644
--- a/drivers/gpu/drm/i915/dvo_ch7017.c
+++ b/drivers/gpu/drm/i915/dvo_ch7017.c
@@ -242,7 +242,7 @@ fail:
242 242
243static enum drm_connector_status ch7017_detect(struct intel_dvo_device *dvo) 243static enum drm_connector_status ch7017_detect(struct intel_dvo_device *dvo)
244{ 244{
245 return connector_status_unknown; 245 return connector_status_connected;
246} 246}
247 247
248static enum drm_mode_status ch7017_mode_valid(struct intel_dvo_device *dvo, 248static enum drm_mode_status ch7017_mode_valid(struct intel_dvo_device *dvo,
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index e6800819bca8..cb900dc83d95 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -34,6 +34,7 @@
34#include "i915_drm.h" 34#include "i915_drm.h"
35#include "i915_drv.h" 35#include "i915_drv.h"
36#include "i915_trace.h" 36#include "i915_trace.h"
37#include "../../../platform/x86/intel_ips.h"
37#include <linux/pci.h> 38#include <linux/pci.h>
38#include <linux/vgaarb.h> 39#include <linux/vgaarb.h>
39#include <linux/acpi.h> 40#include <linux/acpi.h>
@@ -1871,6 +1872,26 @@ out_unlock:
1871EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); 1872EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
1872 1873
1873/** 1874/**
1875 * Tells the intel_ips driver that the i915 driver is now loaded, if
1876 * IPS got loaded first.
1877 *
1878 * This awkward dance is so that neither module has to depend on the
1879 * other in order for IPS to do the appropriate communication of
1880 * GPU turbo limits to i915.
1881 */
1882static void
1883ips_ping_for_i915_load(void)
1884{
1885 void (*link)(void);
1886
1887 link = symbol_get(ips_link_to_i915_driver);
1888 if (link) {
1889 link();
1890 symbol_put(ips_link_to_i915_driver);
1891 }
1892}
1893
1894/**
1874 * i915_driver_load - setup chip and create an initial config 1895 * i915_driver_load - setup chip and create an initial config
1875 * @dev: DRM device 1896 * @dev: DRM device
1876 * @flags: startup flags 1897 * @flags: startup flags
@@ -2075,6 +2096,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
2075 dev_priv->mchdev_lock = &mchdev_lock; 2096 dev_priv->mchdev_lock = &mchdev_lock;
2076 spin_unlock(&mchdev_lock); 2097 spin_unlock(&mchdev_lock);
2077 2098
2099 ips_ping_for_i915_load();
2100
2078 return 0; 2101 return 0;
2079 2102
2080out_workqueue_free: 2103out_workqueue_free:
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 878fc766a12c..cb8f43429279 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2471,6 +2471,9 @@
2471# define MARIUNIT_CLOCK_GATE_DISABLE (1 << 18) 2471# define MARIUNIT_CLOCK_GATE_DISABLE (1 << 18)
2472# define SVSMUNIT_CLOCK_GATE_DISABLE (1 << 1) 2472# define SVSMUNIT_CLOCK_GATE_DISABLE (1 << 1)
2473 2473
2474#define PCH_3DCGDIS1 0x46024
2475# define VFMUNIT_CLOCK_GATE_DISABLE (1 << 11)
2476
2474#define FDI_PLL_FREQ_CTL 0x46030 2477#define FDI_PLL_FREQ_CTL 0x46030
2475#define FDI_PLL_FREQ_CHANGE_REQUEST (1<<24) 2478#define FDI_PLL_FREQ_CHANGE_REQUEST (1<<24)
2476#define FDI_PLL_FREQ_LOCK_LIMIT_MASK 0xfff00 2479#define FDI_PLL_FREQ_LOCK_LIMIT_MASK 0xfff00
@@ -2588,6 +2591,13 @@
2588#define ILK_DISPLAY_CHICKEN2 0x42004 2591#define ILK_DISPLAY_CHICKEN2 0x42004
2589#define ILK_DPARB_GATE (1<<22) 2592#define ILK_DPARB_GATE (1<<22)
2590#define ILK_VSDPFD_FULL (1<<21) 2593#define ILK_VSDPFD_FULL (1<<21)
2594#define ILK_DISPLAY_CHICKEN_FUSES 0x42014
2595#define ILK_INTERNAL_GRAPHICS_DISABLE (1<<31)
2596#define ILK_INTERNAL_DISPLAY_DISABLE (1<<30)
2597#define ILK_DISPLAY_DEBUG_DISABLE (1<<29)
2598#define ILK_HDCP_DISABLE (1<<25)
2599#define ILK_eDP_A_DISABLE (1<<24)
2600#define ILK_DESKTOP (1<<23)
2591#define ILK_DSPCLK_GATE 0x42020 2601#define ILK_DSPCLK_GATE 0x42020
2592#define ILK_DPARB_CLK_GATE (1<<5) 2602#define ILK_DPARB_CLK_GATE (1<<5)
2593/* According to spec this bit 7/8/9 of 0x42020 should be set to enable FBC */ 2603/* According to spec this bit 7/8/9 of 0x42020 should be set to enable FBC */
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index d9b7092439ef..fca523288aca 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -5379,6 +5379,23 @@ static int intel_encoder_clones(struct drm_device *dev, int type_mask)
5379 return index_mask; 5379 return index_mask;
5380} 5380}
5381 5381
5382static bool has_edp_a(struct drm_device *dev)
5383{
5384 struct drm_i915_private *dev_priv = dev->dev_private;
5385
5386 if (!IS_MOBILE(dev))
5387 return false;
5388
5389 if ((I915_READ(DP_A) & DP_DETECTED) == 0)
5390 return false;
5391
5392 if (IS_GEN5(dev) &&
5393 (I915_READ(ILK_DISPLAY_CHICKEN_FUSES) & ILK_eDP_A_DISABLE))
5394 return false;
5395
5396 return true;
5397}
5398
5382static void intel_setup_outputs(struct drm_device *dev) 5399static void intel_setup_outputs(struct drm_device *dev)
5383{ 5400{
5384 struct drm_i915_private *dev_priv = dev->dev_private; 5401 struct drm_i915_private *dev_priv = dev->dev_private;
@@ -5396,7 +5413,7 @@ static void intel_setup_outputs(struct drm_device *dev)
5396 if (HAS_PCH_SPLIT(dev)) { 5413 if (HAS_PCH_SPLIT(dev)) {
5397 dpd_is_edp = intel_dpd_is_edp(dev); 5414 dpd_is_edp = intel_dpd_is_edp(dev);
5398 5415
5399 if (IS_MOBILE(dev) && (I915_READ(DP_A) & DP_DETECTED)) 5416 if (has_edp_a(dev))
5400 intel_dp_init(dev, DP_A); 5417 intel_dp_init(dev, DP_A);
5401 5418
5402 if (dpd_is_edp && (I915_READ(PCH_DP_D) & DP_DETECTED)) 5419 if (dpd_is_edp && (I915_READ(PCH_DP_D) & DP_DETECTED))
@@ -5825,6 +5842,8 @@ void intel_init_clock_gating(struct drm_device *dev)
5825 I915_WRITE(PCH_3DCGDIS0, 5842 I915_WRITE(PCH_3DCGDIS0,
5826 MARIUNIT_CLOCK_GATE_DISABLE | 5843 MARIUNIT_CLOCK_GATE_DISABLE |
5827 SVSMUNIT_CLOCK_GATE_DISABLE); 5844 SVSMUNIT_CLOCK_GATE_DISABLE);
5845 I915_WRITE(PCH_3DCGDIS1,
5846 VFMUNIT_CLOCK_GATE_DISABLE);
5828 } 5847 }
5829 5848
5830 I915_WRITE(PCH_DSPCLK_GATE_D, dspclk_gate); 5849 I915_WRITE(PCH_DSPCLK_GATE_D, dspclk_gate);
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index df648cb4c296..864417cffe9a 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -479,6 +479,7 @@ intel_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode,
479 uint16_t address = algo_data->address; 479 uint16_t address = algo_data->address;
480 uint8_t msg[5]; 480 uint8_t msg[5];
481 uint8_t reply[2]; 481 uint8_t reply[2];
482 unsigned retry;
482 int msg_bytes; 483 int msg_bytes;
483 int reply_bytes; 484 int reply_bytes;
484 int ret; 485 int ret;
@@ -513,14 +514,33 @@ intel_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode,
513 break; 514 break;
514 } 515 }
515 516
516 for (;;) { 517 for (retry = 0; retry < 5; retry++) {
517 ret = intel_dp_aux_ch(intel_dp, 518 ret = intel_dp_aux_ch(intel_dp,
518 msg, msg_bytes, 519 msg, msg_bytes,
519 reply, reply_bytes); 520 reply, reply_bytes);
520 if (ret < 0) { 521 if (ret < 0) {
521 DRM_DEBUG_KMS("aux_ch failed %d\n", ret); 522 DRM_DEBUG_KMS("aux_ch failed %d\n", ret);
522 return ret; 523 return ret;
523 } 524 }
525
526 switch (reply[0] & AUX_NATIVE_REPLY_MASK) {
527 case AUX_NATIVE_REPLY_ACK:
528 /* I2C-over-AUX Reply field is only valid
529 * when paired with AUX ACK.
530 */
531 break;
532 case AUX_NATIVE_REPLY_NACK:
533 DRM_DEBUG_KMS("aux_ch native nack\n");
534 return -EREMOTEIO;
535 case AUX_NATIVE_REPLY_DEFER:
536 udelay(100);
537 continue;
538 default:
539 DRM_ERROR("aux_ch invalid native reply 0x%02x\n",
540 reply[0]);
541 return -EREMOTEIO;
542 }
543
524 switch (reply[0] & AUX_I2C_REPLY_MASK) { 544 switch (reply[0] & AUX_I2C_REPLY_MASK) {
525 case AUX_I2C_REPLY_ACK: 545 case AUX_I2C_REPLY_ACK:
526 if (mode == MODE_I2C_READ) { 546 if (mode == MODE_I2C_READ) {
@@ -528,17 +548,20 @@ intel_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode,
528 } 548 }
529 return reply_bytes - 1; 549 return reply_bytes - 1;
530 case AUX_I2C_REPLY_NACK: 550 case AUX_I2C_REPLY_NACK:
531 DRM_DEBUG_KMS("aux_ch nack\n"); 551 DRM_DEBUG_KMS("aux_i2c nack\n");
532 return -EREMOTEIO; 552 return -EREMOTEIO;
533 case AUX_I2C_REPLY_DEFER: 553 case AUX_I2C_REPLY_DEFER:
534 DRM_DEBUG_KMS("aux_ch defer\n"); 554 DRM_DEBUG_KMS("aux_i2c defer\n");
535 udelay(100); 555 udelay(100);
536 break; 556 break;
537 default: 557 default:
538 DRM_ERROR("aux_ch invalid reply 0x%02x\n", reply[0]); 558 DRM_ERROR("aux_i2c invalid reply 0x%02x\n", reply[0]);
539 return -EREMOTEIO; 559 return -EREMOTEIO;
540 } 560 }
541 } 561 }
562
563 DRM_ERROR("too many retries, giving up\n");
564 return -EREMOTEIO;
542} 565}
543 566
544static int 567static int
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 89a65be8a3f3..31cd7e33e820 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -696,20 +696,17 @@ int intel_wait_ring_buffer(struct drm_device *dev,
696 drm_i915_private_t *dev_priv = dev->dev_private; 696 drm_i915_private_t *dev_priv = dev->dev_private;
697 u32 head; 697 u32 head;
698 698
699 head = intel_read_status_page(ring, 4);
700 if (head) {
701 ring->head = head & HEAD_ADDR;
702 ring->space = ring->head - (ring->tail + 8);
703 if (ring->space < 0)
704 ring->space += ring->size;
705 if (ring->space >= n)
706 return 0;
707 }
708
709 trace_i915_ring_wait_begin (dev); 699 trace_i915_ring_wait_begin (dev);
710 end = jiffies + 3 * HZ; 700 end = jiffies + 3 * HZ;
711 do { 701 do {
712 ring->head = I915_READ_HEAD(ring) & HEAD_ADDR; 702 /* If the reported head position has wrapped or hasn't advanced,
703 * fallback to the slow and accurate path.
704 */
705 head = intel_read_status_page(ring, 4);
706 if (head < ring->actual_head)
707 head = I915_READ_HEAD(ring);
708 ring->actual_head = head;
709 ring->head = head & HEAD_ADDR;
713 ring->space = ring->head - (ring->tail + 8); 710 ring->space = ring->head - (ring->tail + 8);
714 if (ring->space < 0) 711 if (ring->space < 0)
715 ring->space += ring->size; 712 ring->space += ring->size;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 3126c2681983..d2cd0f1efeed 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -30,8 +30,9 @@ struct intel_ring_buffer {
30 struct drm_device *dev; 30 struct drm_device *dev;
31 struct drm_gem_object *gem_object; 31 struct drm_gem_object *gem_object;
32 32
33 unsigned int head; 33 u32 actual_head;
34 unsigned int tail; 34 u32 head;
35 u32 tail;
35 int space; 36 int space;
36 struct intel_hw_status_page status_page; 37 struct intel_hw_status_page status_page;
37 38
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index d97e6cb52d34..6bc42fa2a6ec 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -1908,9 +1908,12 @@ intel_sdvo_select_i2c_bus(struct drm_i915_private *dev_priv,
1908 speed = mapping->i2c_speed; 1908 speed = mapping->i2c_speed;
1909 } 1909 }
1910 1910
1911 sdvo->i2c = &dev_priv->gmbus[pin].adapter; 1911 if (pin < GMBUS_NUM_PORTS) {
1912 intel_gmbus_set_speed(sdvo->i2c, speed); 1912 sdvo->i2c = &dev_priv->gmbus[pin].adapter;
1913 intel_gmbus_force_bit(sdvo->i2c, true); 1913 intel_gmbus_set_speed(sdvo->i2c, speed);
1914 intel_gmbus_force_bit(sdvo->i2c, true);
1915 } else
1916 sdvo->i2c = &dev_priv->gmbus[GMBUS_PORT_DPB].adapter;
1914} 1917}
1915 1918
1916static bool 1919static bool
@@ -2037,13 +2040,14 @@ intel_sdvo_dvi_init(struct intel_sdvo *intel_sdvo, int device)
2037 SDVO_COLORIMETRY_RGB256); 2040 SDVO_COLORIMETRY_RGB256);
2038 connector->connector_type = DRM_MODE_CONNECTOR_HDMIA; 2041 connector->connector_type = DRM_MODE_CONNECTOR_HDMIA;
2039 2042
2040 intel_sdvo_add_hdmi_properties(intel_sdvo_connector);
2041 intel_sdvo->is_hdmi = true; 2043 intel_sdvo->is_hdmi = true;
2042 } 2044 }
2043 intel_sdvo->base.clone_mask = ((1 << INTEL_SDVO_NON_TV_CLONE_BIT) | 2045 intel_sdvo->base.clone_mask = ((1 << INTEL_SDVO_NON_TV_CLONE_BIT) |
2044 (1 << INTEL_ANALOG_CLONE_BIT)); 2046 (1 << INTEL_ANALOG_CLONE_BIT));
2045 2047
2046 intel_sdvo_connector_init(intel_sdvo_connector, intel_sdvo); 2048 intel_sdvo_connector_init(intel_sdvo_connector, intel_sdvo);
2049 if (intel_sdvo->is_hdmi)
2050 intel_sdvo_add_hdmi_properties(intel_sdvo_connector);
2047 2051
2048 return true; 2052 return true;
2049} 2053}
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index df2b6f2b35f8..9fbabaa6ee44 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -253,7 +253,8 @@ void atombios_crtc_dpms(struct drm_crtc *crtc, int mode)
253 case DRM_MODE_DPMS_SUSPEND: 253 case DRM_MODE_DPMS_SUSPEND:
254 case DRM_MODE_DPMS_OFF: 254 case DRM_MODE_DPMS_OFF:
255 drm_vblank_pre_modeset(dev, radeon_crtc->crtc_id); 255 drm_vblank_pre_modeset(dev, radeon_crtc->crtc_id);
256 atombios_blank_crtc(crtc, ATOM_ENABLE); 256 if (radeon_crtc->enabled)
257 atombios_blank_crtc(crtc, ATOM_ENABLE);
257 if (ASIC_IS_DCE3(rdev)) 258 if (ASIC_IS_DCE3(rdev))
258 atombios_enable_crtc_memreq(crtc, ATOM_DISABLE); 259 atombios_enable_crtc_memreq(crtc, ATOM_DISABLE);
259 atombios_enable_crtc(crtc, ATOM_DISABLE); 260 atombios_enable_crtc(crtc, ATOM_DISABLE);
@@ -530,7 +531,7 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
530 dp_clock = dig_connector->dp_clock; 531 dp_clock = dig_connector->dp_clock;
531 } 532 }
532 } 533 }
533 534#if 0 /* doesn't work properly on some laptops */
534 /* use recommended ref_div for ss */ 535 /* use recommended ref_div for ss */
535 if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) { 536 if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
536 if (ss_enabled) { 537 if (ss_enabled) {
@@ -540,7 +541,7 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
540 } 541 }
541 } 542 }
542 } 543 }
543 544#endif
544 if (ASIC_IS_AVIVO(rdev)) { 545 if (ASIC_IS_AVIVO(rdev)) {
545 /* DVO wants 2x pixel clock if the DVO chip is in 12 bit mode */ 546 /* DVO wants 2x pixel clock if the DVO chip is in 12 bit mode */
546 if (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1) 547 if (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1)
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 4dc5b4714c5a..7b337c361a12 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -748,6 +748,8 @@ void evergreen_pcie_gart_tlb_flush(struct radeon_device *rdev)
748 unsigned i; 748 unsigned i;
749 u32 tmp; 749 u32 tmp;
750 750
751 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
752
751 WREG32(VM_CONTEXT0_REQUEST_RESPONSE, REQUEST_TYPE(1)); 753 WREG32(VM_CONTEXT0_REQUEST_RESPONSE, REQUEST_TYPE(1));
752 for (i = 0; i < rdev->usec_timeout; i++) { 754 for (i = 0; i < rdev->usec_timeout; i++) {
753 /* read MC_STATUS */ 755 /* read MC_STATUS */
@@ -1922,7 +1924,6 @@ bool evergreen_gpu_is_lockup(struct radeon_device *rdev)
1922static int evergreen_gpu_soft_reset(struct radeon_device *rdev) 1924static int evergreen_gpu_soft_reset(struct radeon_device *rdev)
1923{ 1925{
1924 struct evergreen_mc_save save; 1926 struct evergreen_mc_save save;
1925 u32 srbm_reset = 0;
1926 u32 grbm_reset = 0; 1927 u32 grbm_reset = 0;
1927 1928
1928 dev_info(rdev->dev, "GPU softreset \n"); 1929 dev_info(rdev->dev, "GPU softreset \n");
@@ -1961,16 +1962,6 @@ static int evergreen_gpu_soft_reset(struct radeon_device *rdev)
1961 udelay(50); 1962 udelay(50);
1962 WREG32(GRBM_SOFT_RESET, 0); 1963 WREG32(GRBM_SOFT_RESET, 0);
1963 (void)RREG32(GRBM_SOFT_RESET); 1964 (void)RREG32(GRBM_SOFT_RESET);
1964
1965 /* reset all the system blocks */
1966 srbm_reset = SRBM_SOFT_RESET_ALL_MASK;
1967
1968 dev_info(rdev->dev, " SRBM_SOFT_RESET=0x%08X\n", srbm_reset);
1969 WREG32(SRBM_SOFT_RESET, srbm_reset);
1970 (void)RREG32(SRBM_SOFT_RESET);
1971 udelay(50);
1972 WREG32(SRBM_SOFT_RESET, 0);
1973 (void)RREG32(SRBM_SOFT_RESET);
1974 /* Wait a little for things to settle down */ 1965 /* Wait a little for things to settle down */
1975 udelay(50); 1966 udelay(50);
1976 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n", 1967 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
@@ -1981,10 +1972,6 @@ static int evergreen_gpu_soft_reset(struct radeon_device *rdev)
1981 RREG32(GRBM_STATUS_SE1)); 1972 RREG32(GRBM_STATUS_SE1));
1982 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n", 1973 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
1983 RREG32(SRBM_STATUS)); 1974 RREG32(SRBM_STATUS));
1984 /* After reset we need to reinit the asic as GPU often endup in an
1985 * incoherent state.
1986 */
1987 atom_asic_init(rdev->mode_info.atom_context);
1988 evergreen_mc_resume(rdev, &save); 1975 evergreen_mc_resume(rdev, &save);
1989 return 0; 1976 return 0;
1990} 1977}
@@ -2596,6 +2583,11 @@ int evergreen_resume(struct radeon_device *rdev)
2596{ 2583{
2597 int r; 2584 int r;
2598 2585
2586 /* reset the asic, the gfx blocks are often in a bad state
2587 * after the driver is unloaded or after a resume
2588 */
2589 if (radeon_asic_reset(rdev))
2590 dev_warn(rdev->dev, "GPU reset failed !\n");
2599 /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw, 2591 /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
2600 * posting will perform necessary task to bring back GPU into good 2592 * posting will perform necessary task to bring back GPU into good
2601 * shape. 2593 * shape.
@@ -2712,6 +2704,11 @@ int evergreen_init(struct radeon_device *rdev)
2712 r = radeon_atombios_init(rdev); 2704 r = radeon_atombios_init(rdev);
2713 if (r) 2705 if (r)
2714 return r; 2706 return r;
2707 /* reset the asic, the gfx blocks are often in a bad state
2708 * after the driver is unloaded or after a resume
2709 */
2710 if (radeon_asic_reset(rdev))
2711 dev_warn(rdev->dev, "GPU reset failed !\n");
2715 /* Post card if necessary */ 2712 /* Post card if necessary */
2716 if (!evergreen_card_posted(rdev)) { 2713 if (!evergreen_card_posted(rdev)) {
2717 if (!rdev->bios) { 2714 if (!rdev->bios) {
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index 113c70cc8b39..a73b53c44359 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -174,6 +174,7 @@
174#define HDP_NONSURFACE_BASE 0x2C04 174#define HDP_NONSURFACE_BASE 0x2C04
175#define HDP_NONSURFACE_INFO 0x2C08 175#define HDP_NONSURFACE_INFO 0x2C08
176#define HDP_NONSURFACE_SIZE 0x2C0C 176#define HDP_NONSURFACE_SIZE 0x2C0C
177#define HDP_MEM_COHERENCY_FLUSH_CNTL 0x5480
177#define HDP_REG_COHERENCY_FLUSH_CNTL 0x54A0 178#define HDP_REG_COHERENCY_FLUSH_CNTL 0x54A0
178#define HDP_TILING_CONFIG 0x2F3C 179#define HDP_TILING_CONFIG 0x2F3C
179 180
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 4d7a2e1bdb90..9c92db7c896b 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1342,13 +1342,19 @@ bool r600_gpu_is_lockup(struct radeon_device *rdev)
1342 u32 srbm_status; 1342 u32 srbm_status;
1343 u32 grbm_status; 1343 u32 grbm_status;
1344 u32 grbm_status2; 1344 u32 grbm_status2;
1345 struct r100_gpu_lockup *lockup;
1345 int r; 1346 int r;
1346 1347
1348 if (rdev->family >= CHIP_RV770)
1349 lockup = &rdev->config.rv770.lockup;
1350 else
1351 lockup = &rdev->config.r600.lockup;
1352
1347 srbm_status = RREG32(R_000E50_SRBM_STATUS); 1353 srbm_status = RREG32(R_000E50_SRBM_STATUS);
1348 grbm_status = RREG32(R_008010_GRBM_STATUS); 1354 grbm_status = RREG32(R_008010_GRBM_STATUS);
1349 grbm_status2 = RREG32(R_008014_GRBM_STATUS2); 1355 grbm_status2 = RREG32(R_008014_GRBM_STATUS2);
1350 if (!G_008010_GUI_ACTIVE(grbm_status)) { 1356 if (!G_008010_GUI_ACTIVE(grbm_status)) {
1351 r100_gpu_lockup_update(&rdev->config.r300.lockup, &rdev->cp); 1357 r100_gpu_lockup_update(lockup, &rdev->cp);
1352 return false; 1358 return false;
1353 } 1359 }
1354 /* force CP activities */ 1360 /* force CP activities */
@@ -1360,7 +1366,7 @@ bool r600_gpu_is_lockup(struct radeon_device *rdev)
1360 radeon_ring_unlock_commit(rdev); 1366 radeon_ring_unlock_commit(rdev);
1361 } 1367 }
1362 rdev->cp.rptr = RREG32(R600_CP_RB_RPTR); 1368 rdev->cp.rptr = RREG32(R600_CP_RB_RPTR);
1363 return r100_gpu_cp_is_lockup(rdev, &rdev->config.r300.lockup, &rdev->cp); 1369 return r100_gpu_cp_is_lockup(rdev, lockup, &rdev->cp);
1364} 1370}
1365 1371
1366int r600_asic_reset(struct radeon_device *rdev) 1372int r600_asic_reset(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 0f90fc3482ce..7831e0890210 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -315,11 +315,10 @@ static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
315 if (array_mode == V_0280A0_ARRAY_LINEAR_GENERAL) { 315 if (array_mode == V_0280A0_ARRAY_LINEAR_GENERAL) {
316 /* the initial DDX does bad things with the CB size occasionally */ 316 /* the initial DDX does bad things with the CB size occasionally */
317 /* it rounds up height too far for slice tile max but the BO is smaller */ 317 /* it rounds up height too far for slice tile max but the BO is smaller */
318 tmp = (height - 7) * 8 * bpe; 318 /* r600c,g also seem to flush at bad times in some apps resulting in
319 if ((tmp + track->cb_color_bo_offset[i]) > radeon_bo_size(track->cb_color_bo[i])) { 319 * bogus values here. So for linear just allow anything to avoid breaking
320 dev_warn(p->dev, "%s offset[%d] %d %d %lu too big\n", __func__, i, track->cb_color_bo_offset[i], tmp, radeon_bo_size(track->cb_color_bo[i])); 320 * broken userspace.
321 return -EINVAL; 321 */
322 }
323 } else { 322 } else {
324 dev_warn(p->dev, "%s offset[%d] %d %d %lu too big\n", __func__, i, track->cb_color_bo_offset[i], tmp, radeon_bo_size(track->cb_color_bo[i])); 323 dev_warn(p->dev, "%s offset[%d] %d %d %lu too big\n", __func__, i, track->cb_color_bo_offset[i], tmp, radeon_bo_size(track->cb_color_bo[i]));
325 return -EINVAL; 324 return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index e12e79326cb1..501966a13f48 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -910,11 +910,6 @@ int radeon_resume_kms(struct drm_device *dev)
910 radeon_pm_resume(rdev); 910 radeon_pm_resume(rdev);
911 radeon_restore_bios_scratch_regs(rdev); 911 radeon_restore_bios_scratch_regs(rdev);
912 912
913 /* turn on display hw */
914 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
915 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
916 }
917
918 radeon_fbdev_set_suspend(rdev, 0); 913 radeon_fbdev_set_suspend(rdev, 0);
919 release_console_sem(); 914 release_console_sem();
920 915
@@ -922,6 +917,10 @@ int radeon_resume_kms(struct drm_device *dev)
922 radeon_hpd_init(rdev); 917 radeon_hpd_init(rdev);
923 /* blat the mode back in */ 918 /* blat the mode back in */
924 drm_helper_resume_force_mode(dev); 919 drm_helper_resume_force_mode(dev);
920 /* turn on display hw */
921 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
922 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
923 }
925 return 0; 924 return 0;
926} 925}
927 926
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 88e4ea925900..60e689f2d048 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -232,9 +232,28 @@ static struct drm_driver driver_old = {
232 232
233static struct drm_driver kms_driver; 233static struct drm_driver kms_driver;
234 234
235static void radeon_kick_out_firmware_fb(struct pci_dev *pdev)
236{
237 struct apertures_struct *ap;
238 bool primary = false;
239
240 ap = alloc_apertures(1);
241 ap->ranges[0].base = pci_resource_start(pdev, 0);
242 ap->ranges[0].size = pci_resource_len(pdev, 0);
243
244#ifdef CONFIG_X86
245 primary = pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW;
246#endif
247 remove_conflicting_framebuffers(ap, "radeondrmfb", primary);
248 kfree(ap);
249}
250
235static int __devinit 251static int __devinit
236radeon_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 252radeon_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
237{ 253{
254 /* Get rid of things like offb */
255 radeon_kick_out_firmware_fb(pdev);
256
238 return drm_get_pci_dev(pdev, ent, &kms_driver); 257 return drm_get_pci_dev(pdev, ent, &kms_driver);
239} 258}
240 259
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index efa211898fe6..6abea32be5e8 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -245,7 +245,7 @@ static int radeonfb_create(struct radeon_fbdev *rfbdev,
245 goto out_unref; 245 goto out_unref;
246 } 246 }
247 info->apertures->ranges[0].base = rdev->ddev->mode_config.fb_base; 247 info->apertures->ranges[0].base = rdev->ddev->mode_config.fb_base;
248 info->apertures->ranges[0].size = rdev->mc.real_vram_size; 248 info->apertures->ranges[0].size = rdev->mc.aper_size;
249 249
250 info->fix.mmio_start = 0; 250 info->fix.mmio_start = 0;
251 info->fix.mmio_len = 0; 251 info->fix.mmio_len = 0;
diff --git a/drivers/hwmon/s3c-hwmon.c b/drivers/hwmon/s3c-hwmon.c
index 05248f2d7581..92b42db43bcf 100644
--- a/drivers/hwmon/s3c-hwmon.c
+++ b/drivers/hwmon/s3c-hwmon.c
@@ -234,7 +234,6 @@ static int s3c_hwmon_create_attr(struct device *dev,
234 attr->index = channel; 234 attr->index = channel;
235 attr->dev_attr.attr.name = attrs->in_name; 235 attr->dev_attr.attr.name = attrs->in_name;
236 attr->dev_attr.attr.mode = S_IRUGO; 236 attr->dev_attr.attr.mode = S_IRUGO;
237 attr->dev_attr.attr.owner = THIS_MODULE;
238 attr->dev_attr.show = s3c_hwmon_ch_show; 237 attr->dev_attr.show = s3c_hwmon_ch_show;
239 238
240 ret = device_create_file(dev, &attr->dev_attr); 239 ret = device_create_file(dev, &attr->dev_attr);
@@ -252,7 +251,6 @@ static int s3c_hwmon_create_attr(struct device *dev,
252 attr->index = channel; 251 attr->index = channel;
253 attr->dev_attr.attr.name = attrs->label_name; 252 attr->dev_attr.attr.name = attrs->label_name;
254 attr->dev_attr.attr.mode = S_IRUGO; 253 attr->dev_attr.attr.mode = S_IRUGO;
255 attr->dev_attr.attr.owner = THIS_MODULE;
256 attr->dev_attr.show = s3c_hwmon_label_show; 254 attr->dev_attr.show = s3c_hwmon_label_show;
257 255
258 ret = device_create_file(dev, &attr->dev_attr); 256 ret = device_create_file(dev, &attr->dev_attr);
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index c131d58bcb50..56ac09d6c930 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -220,9 +220,8 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state)
220 kt_before = ktime_get_real(); 220 kt_before = ktime_get_real();
221 221
222 stop_critical_timings(); 222 stop_critical_timings();
223#ifndef MODULE
224 trace_power_start(POWER_CSTATE, (eax >> 4) + 1, cpu); 223 trace_power_start(POWER_CSTATE, (eax >> 4) + 1, cpu);
225#endif 224 trace_cpu_idle((eax >> 4) + 1, cpu);
226 if (!need_resched()) { 225 if (!need_resched()) {
227 226
228 __monitor((void *)&current_thread_info()->flags, 0, 0); 227 __monitor((void *)&current_thread_info()->flags, 0, 0);
diff --git a/drivers/isdn/gigaset/capi.c b/drivers/isdn/gigaset/capi.c
index bcc174e4f3b1..658e75f18d05 100644
--- a/drivers/isdn/gigaset/capi.c
+++ b/drivers/isdn/gigaset/capi.c
@@ -1900,6 +1900,7 @@ static void do_disconnect_req(struct gigaset_capi_ctr *iif,
1900 if (b3skb == NULL) { 1900 if (b3skb == NULL) {
1901 dev_err(cs->dev, "%s: out of memory\n", __func__); 1901 dev_err(cs->dev, "%s: out of memory\n", __func__);
1902 send_conf(iif, ap, skb, CAPI_MSGOSRESOURCEERR); 1902 send_conf(iif, ap, skb, CAPI_MSGOSRESOURCEERR);
1903 kfree(b3cmsg);
1903 return; 1904 return;
1904 } 1905 }
1905 capi_cmsg2message(b3cmsg, 1906 capi_cmsg2message(b3cmsg,
diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index 211e21f34bd5..d5a4ade88991 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -267,7 +267,7 @@ void led_blink_set(struct led_classdev *led_cdev,
267 unsigned long *delay_off) 267 unsigned long *delay_off)
268{ 268{
269 if (led_cdev->blink_set && 269 if (led_cdev->blink_set &&
270 led_cdev->blink_set(led_cdev, delay_on, delay_off)) 270 !led_cdev->blink_set(led_cdev, delay_on, delay_off))
271 return; 271 return;
272 272
273 /* blink with 1 Hz as default if nothing specified */ 273 /* blink with 1 Hz as default if nothing specified */
diff --git a/drivers/media/IR/keymaps/rc-rc6-mce.c b/drivers/media/IR/keymaps/rc-rc6-mce.c
index 1b7adabbcee9..6da955dfef48 100644
--- a/drivers/media/IR/keymaps/rc-rc6-mce.c
+++ b/drivers/media/IR/keymaps/rc-rc6-mce.c
@@ -26,8 +26,8 @@ static struct ir_scancode rc6_mce[] = {
26 26
27 { 0x800f040a, KEY_DELETE }, 27 { 0x800f040a, KEY_DELETE },
28 { 0x800f040b, KEY_ENTER }, 28 { 0x800f040b, KEY_ENTER },
29 { 0x800f040c, KEY_POWER }, 29 { 0x800f040c, KEY_POWER }, /* PC Power */
30 { 0x800f040d, KEY_PROG1 }, /* Windows MCE button */ 30 { 0x800f040d, KEY_PROG1 }, /* Windows MCE button */
31 { 0x800f040e, KEY_MUTE }, 31 { 0x800f040e, KEY_MUTE },
32 { 0x800f040f, KEY_INFO }, 32 { 0x800f040f, KEY_INFO },
33 33
@@ -56,31 +56,32 @@ static struct ir_scancode rc6_mce[] = {
56 { 0x800f0422, KEY_OK }, 56 { 0x800f0422, KEY_OK },
57 { 0x800f0423, KEY_EXIT }, 57 { 0x800f0423, KEY_EXIT },
58 { 0x800f0424, KEY_DVD }, 58 { 0x800f0424, KEY_DVD },
59 { 0x800f0425, KEY_TUNER }, /* LiveTV */ 59 { 0x800f0425, KEY_TUNER }, /* LiveTV */
60 { 0x800f0426, KEY_EPG }, /* Guide */ 60 { 0x800f0426, KEY_EPG }, /* Guide */
61 { 0x800f0427, KEY_ZOOM }, /* Aspect */ 61 { 0x800f0427, KEY_ZOOM }, /* Aspect */
62 62
63 { 0x800f043a, KEY_BRIGHTNESSUP }, 63 { 0x800f043a, KEY_BRIGHTNESSUP },
64 64
65 { 0x800f0446, KEY_TV }, 65 { 0x800f0446, KEY_TV },
66 { 0x800f0447, KEY_AUDIO }, /* My Music */ 66 { 0x800f0447, KEY_AUDIO }, /* My Music */
67 { 0x800f0448, KEY_PVR }, /* RecordedTV */ 67 { 0x800f0448, KEY_PVR }, /* RecordedTV */
68 { 0x800f0449, KEY_CAMERA }, 68 { 0x800f0449, KEY_CAMERA },
69 { 0x800f044a, KEY_VIDEO }, 69 { 0x800f044a, KEY_VIDEO },
70 { 0x800f044c, KEY_LANGUAGE }, 70 { 0x800f044c, KEY_LANGUAGE },
71 { 0x800f044d, KEY_TITLE }, 71 { 0x800f044d, KEY_TITLE },
72 { 0x800f044e, KEY_PRINT }, /* Print - HP OEM version of remote */ 72 { 0x800f044e, KEY_PRINT }, /* Print - HP OEM version of remote */
73 73
74 { 0x800f0450, KEY_RADIO }, 74 { 0x800f0450, KEY_RADIO },
75 75
76 { 0x800f045a, KEY_SUBTITLE }, /* Caption/Teletext */ 76 { 0x800f045a, KEY_SUBTITLE }, /* Caption/Teletext */
77 { 0x800f045b, KEY_RED }, 77 { 0x800f045b, KEY_RED },
78 { 0x800f045c, KEY_GREEN }, 78 { 0x800f045c, KEY_GREEN },
79 { 0x800f045d, KEY_YELLOW }, 79 { 0x800f045d, KEY_YELLOW },
80 { 0x800f045e, KEY_BLUE }, 80 { 0x800f045e, KEY_BLUE },
81 81
82 { 0x800f0465, KEY_POWER2 }, /* TV Power */
82 { 0x800f046e, KEY_PLAYPAUSE }, 83 { 0x800f046e, KEY_PLAYPAUSE },
83 { 0x800f046f, KEY_MEDIA }, /* Start media application (NEW) */ 84 { 0x800f046f, KEY_MEDIA }, /* Start media application (NEW) */
84 85
85 { 0x800f0480, KEY_BRIGHTNESSDOWN }, 86 { 0x800f0480, KEY_BRIGHTNESSDOWN },
86 { 0x800f0481, KEY_PLAYPAUSE }, 87 { 0x800f0481, KEY_PLAYPAUSE },
diff --git a/drivers/media/IR/lirc_dev.c b/drivers/media/IR/lirc_dev.c
index 8418b14ee4d2..756656e17bdd 100644
--- a/drivers/media/IR/lirc_dev.c
+++ b/drivers/media/IR/lirc_dev.c
@@ -522,10 +522,8 @@ unsigned int lirc_dev_fop_poll(struct file *file, poll_table *wait)
522 522
523 dev_dbg(ir->d.dev, LOGHEAD "poll called\n", ir->d.name, ir->d.minor); 523 dev_dbg(ir->d.dev, LOGHEAD "poll called\n", ir->d.name, ir->d.minor);
524 524
525 if (!ir->attached) { 525 if (!ir->attached)
526 mutex_unlock(&ir->irctl_lock);
527 return POLLERR; 526 return POLLERR;
528 }
529 527
530 poll_wait(file, &ir->buf->wait_poll, wait); 528 poll_wait(file, &ir->buf->wait_poll, wait);
531 529
@@ -649,18 +647,18 @@ ssize_t lirc_dev_fop_read(struct file *file,
649 if (!buf) 647 if (!buf)
650 return -ENOMEM; 648 return -ENOMEM;
651 649
652 if (mutex_lock_interruptible(&ir->irctl_lock)) 650 if (mutex_lock_interruptible(&ir->irctl_lock)) {
653 return -ERESTARTSYS; 651 ret = -ERESTARTSYS;
652 goto out_unlocked;
653 }
654 if (!ir->attached) { 654 if (!ir->attached) {
655 mutex_unlock(&ir->irctl_lock); 655 ret = -ENODEV;
656 return -ENODEV; 656 goto out_locked;
657 } 657 }
658 658
659 if (length % ir->chunk_size) { 659 if (length % ir->chunk_size) {
660 dev_dbg(ir->d.dev, LOGHEAD "read result = -EINVAL\n", 660 ret = -EINVAL;
661 ir->d.name, ir->d.minor); 661 goto out_locked;
662 mutex_unlock(&ir->irctl_lock);
663 return -EINVAL;
664 } 662 }
665 663
666 /* 664 /*
@@ -711,18 +709,23 @@ ssize_t lirc_dev_fop_read(struct file *file,
711 lirc_buffer_read(ir->buf, buf); 709 lirc_buffer_read(ir->buf, buf);
712 ret = copy_to_user((void *)buffer+written, buf, 710 ret = copy_to_user((void *)buffer+written, buf,
713 ir->buf->chunk_size); 711 ir->buf->chunk_size);
714 written += ir->buf->chunk_size; 712 if (!ret)
713 written += ir->buf->chunk_size;
714 else
715 ret = -EFAULT;
715 } 716 }
716 } 717 }
717 718
718 remove_wait_queue(&ir->buf->wait_poll, &wait); 719 remove_wait_queue(&ir->buf->wait_poll, &wait);
719 set_current_state(TASK_RUNNING); 720 set_current_state(TASK_RUNNING);
721
722out_locked:
720 mutex_unlock(&ir->irctl_lock); 723 mutex_unlock(&ir->irctl_lock);
721 724
722out_unlocked: 725out_unlocked:
723 kfree(buf); 726 kfree(buf);
724 dev_dbg(ir->d.dev, LOGHEAD "read result = %s (%d)\n", 727 dev_dbg(ir->d.dev, LOGHEAD "read result = %s (%d)\n",
725 ir->d.name, ir->d.minor, ret ? "-EFAULT" : "OK", ret); 728 ir->d.name, ir->d.minor, ret ? "<fail>" : "<ok>", ret);
726 729
727 return ret ? ret : written; 730 return ret ? ret : written;
728} 731}
diff --git a/drivers/media/IR/mceusb.c b/drivers/media/IR/mceusb.c
index 9dce684fd231..392ca24132da 100644
--- a/drivers/media/IR/mceusb.c
+++ b/drivers/media/IR/mceusb.c
@@ -35,10 +35,10 @@
35#include <linux/device.h> 35#include <linux/device.h>
36#include <linux/module.h> 36#include <linux/module.h>
37#include <linux/slab.h> 37#include <linux/slab.h>
38#include <linux/usb.h>
39#include <linux/input.h> 38#include <linux/input.h>
39#include <linux/usb.h>
40#include <linux/usb/input.h>
40#include <media/ir-core.h> 41#include <media/ir-core.h>
41#include <media/ir-common.h>
42 42
43#define DRIVER_VERSION "1.91" 43#define DRIVER_VERSION "1.91"
44#define DRIVER_AUTHOR "Jarod Wilson <jarod@wilsonet.com>" 44#define DRIVER_AUTHOR "Jarod Wilson <jarod@wilsonet.com>"
@@ -49,6 +49,7 @@
49#define USB_BUFLEN 32 /* USB reception buffer length */ 49#define USB_BUFLEN 32 /* USB reception buffer length */
50#define USB_CTRL_MSG_SZ 2 /* Size of usb ctrl msg on gen1 hw */ 50#define USB_CTRL_MSG_SZ 2 /* Size of usb ctrl msg on gen1 hw */
51#define MCE_G1_INIT_MSGS 40 /* Init messages on gen1 hw to throw out */ 51#define MCE_G1_INIT_MSGS 40 /* Init messages on gen1 hw to throw out */
52#define MS_TO_NS(msec) ((msec) * 1000)
52 53
53/* MCE constants */ 54/* MCE constants */
54#define MCE_CMDBUF_SIZE 384 /* MCE Command buffer length */ 55#define MCE_CMDBUF_SIZE 384 /* MCE Command buffer length */
@@ -74,6 +75,7 @@
74#define MCE_PACKET_LENGTH_MASK 0x1f /* Packet length mask */ 75#define MCE_PACKET_LENGTH_MASK 0x1f /* Packet length mask */
75 76
76/* Sub-commands, which follow MCE_COMMAND_HEADER or MCE_HW_CMD_HEADER */ 77/* Sub-commands, which follow MCE_COMMAND_HEADER or MCE_HW_CMD_HEADER */
78#define MCE_CMD_SIG_END 0x01 /* End of signal */
77#define MCE_CMD_PING 0x03 /* Ping device */ 79#define MCE_CMD_PING 0x03 /* Ping device */
78#define MCE_CMD_UNKNOWN 0x04 /* Unknown */ 80#define MCE_CMD_UNKNOWN 0x04 /* Unknown */
79#define MCE_CMD_UNKNOWN2 0x05 /* Unknown */ 81#define MCE_CMD_UNKNOWN2 0x05 /* Unknown */
@@ -91,6 +93,7 @@
91#define MCE_CMD_G_TXMASK 0x13 /* Set TX port bitmask */ 93#define MCE_CMD_G_TXMASK 0x13 /* Set TX port bitmask */
92#define MCE_CMD_S_RXSENSOR 0x14 /* Set RX sensor (std/learning) */ 94#define MCE_CMD_S_RXSENSOR 0x14 /* Set RX sensor (std/learning) */
93#define MCE_CMD_G_RXSENSOR 0x15 /* Get RX sensor (std/learning) */ 95#define MCE_CMD_G_RXSENSOR 0x15 /* Get RX sensor (std/learning) */
96#define MCE_RSP_PULSE_COUNT 0x15 /* RX pulse count (only if learning) */
94#define MCE_CMD_TX_PORTS 0x16 /* Get number of TX ports */ 97#define MCE_CMD_TX_PORTS 0x16 /* Get number of TX ports */
95#define MCE_CMD_G_WAKESRC 0x17 /* Get wake source */ 98#define MCE_CMD_G_WAKESRC 0x17 /* Get wake source */
96#define MCE_CMD_UNKNOWN7 0x18 /* Unknown */ 99#define MCE_CMD_UNKNOWN7 0x18 /* Unknown */
@@ -146,14 +149,16 @@ enum mceusb_model_type {
146 MCE_GEN3, 149 MCE_GEN3,
147 MCE_GEN2_TX_INV, 150 MCE_GEN2_TX_INV,
148 POLARIS_EVK, 151 POLARIS_EVK,
152 CX_HYBRID_TV,
149}; 153};
150 154
151struct mceusb_model { 155struct mceusb_model {
152 u32 mce_gen1:1; 156 u32 mce_gen1:1;
153 u32 mce_gen2:1; 157 u32 mce_gen2:1;
154 u32 mce_gen3:1; 158 u32 mce_gen3:1;
155 u32 tx_mask_inverted:1; 159 u32 tx_mask_normal:1;
156 u32 is_polaris:1; 160 u32 is_polaris:1;
161 u32 no_tx:1;
157 162
158 const char *rc_map; /* Allow specify a per-board map */ 163 const char *rc_map; /* Allow specify a per-board map */
159 const char *name; /* per-board name */ 164 const char *name; /* per-board name */
@@ -162,18 +167,18 @@ struct mceusb_model {
162static const struct mceusb_model mceusb_model[] = { 167static const struct mceusb_model mceusb_model[] = {
163 [MCE_GEN1] = { 168 [MCE_GEN1] = {
164 .mce_gen1 = 1, 169 .mce_gen1 = 1,
165 .tx_mask_inverted = 1, 170 .tx_mask_normal = 1,
166 }, 171 },
167 [MCE_GEN2] = { 172 [MCE_GEN2] = {
168 .mce_gen2 = 1, 173 .mce_gen2 = 1,
169 }, 174 },
170 [MCE_GEN2_TX_INV] = { 175 [MCE_GEN2_TX_INV] = {
171 .mce_gen2 = 1, 176 .mce_gen2 = 1,
172 .tx_mask_inverted = 1, 177 .tx_mask_normal = 1,
173 }, 178 },
174 [MCE_GEN3] = { 179 [MCE_GEN3] = {
175 .mce_gen3 = 1, 180 .mce_gen3 = 1,
176 .tx_mask_inverted = 1, 181 .tx_mask_normal = 1,
177 }, 182 },
178 [POLARIS_EVK] = { 183 [POLARIS_EVK] = {
179 .is_polaris = 1, 184 .is_polaris = 1,
@@ -183,7 +188,12 @@ static const struct mceusb_model mceusb_model[] = {
183 * to allow testing it 188 * to allow testing it
184 */ 189 */
185 .rc_map = RC_MAP_RC5_HAUPPAUGE_NEW, 190 .rc_map = RC_MAP_RC5_HAUPPAUGE_NEW,
186 .name = "cx231xx MCE IR", 191 .name = "Conexant Hybrid TV (cx231xx) MCE IR",
192 },
193 [CX_HYBRID_TV] = {
194 .is_polaris = 1,
195 .no_tx = 1, /* tx isn't wired up at all */
196 .name = "Conexant Hybrid TV (cx231xx) MCE IR",
187 }, 197 },
188}; 198};
189 199
@@ -273,6 +283,8 @@ static struct usb_device_id mceusb_dev_table[] = {
273 { USB_DEVICE(VENDOR_FORMOSA, 0xe03c) }, 283 { USB_DEVICE(VENDOR_FORMOSA, 0xe03c) },
274 /* Formosa Industrial Computing */ 284 /* Formosa Industrial Computing */
275 { USB_DEVICE(VENDOR_FORMOSA, 0xe03e) }, 285 { USB_DEVICE(VENDOR_FORMOSA, 0xe03e) },
286 /* Fintek eHome Infrared Transceiver (HP branded) */
287 { USB_DEVICE(VENDOR_FINTEK, 0x5168) },
276 /* Fintek eHome Infrared Transceiver */ 288 /* Fintek eHome Infrared Transceiver */
277 { USB_DEVICE(VENDOR_FINTEK, 0x0602) }, 289 { USB_DEVICE(VENDOR_FINTEK, 0x0602) },
278 /* Fintek eHome Infrared Transceiver (in the AOpen MP45) */ 290 /* Fintek eHome Infrared Transceiver (in the AOpen MP45) */
@@ -292,9 +304,12 @@ static struct usb_device_id mceusb_dev_table[] = {
292 { USB_DEVICE(VENDOR_NORTHSTAR, 0xe004) }, 304 { USB_DEVICE(VENDOR_NORTHSTAR, 0xe004) },
293 /* TiVo PC IR Receiver */ 305 /* TiVo PC IR Receiver */
294 { USB_DEVICE(VENDOR_TIVO, 0x2000) }, 306 { USB_DEVICE(VENDOR_TIVO, 0x2000) },
295 /* Conexant SDK */ 307 /* Conexant Hybrid TV "Shelby" Polaris SDK */
296 { USB_DEVICE(VENDOR_CONEXANT, 0x58a1), 308 { USB_DEVICE(VENDOR_CONEXANT, 0x58a1),
297 .driver_info = POLARIS_EVK }, 309 .driver_info = POLARIS_EVK },
310 /* Conexant Hybrid TV RDU253S Polaris */
311 { USB_DEVICE(VENDOR_CONEXANT, 0x58a5),
312 .driver_info = CX_HYBRID_TV },
298 /* Terminating entry */ 313 /* Terminating entry */
299 { } 314 { }
300}; 315};
@@ -303,7 +318,10 @@ static struct usb_device_id mceusb_dev_table[] = {
303struct mceusb_dev { 318struct mceusb_dev {
304 /* ir-core bits */ 319 /* ir-core bits */
305 struct ir_dev_props *props; 320 struct ir_dev_props *props;
306 struct ir_raw_event rawir; 321
322 /* optional features we can enable */
323 bool carrier_report_enabled;
324 bool learning_enabled;
307 325
308 /* core device bits */ 326 /* core device bits */
309 struct device *dev; 327 struct device *dev;
@@ -318,6 +336,8 @@ struct mceusb_dev {
318 /* buffers and dma */ 336 /* buffers and dma */
319 unsigned char *buf_in; 337 unsigned char *buf_in;
320 unsigned int len_in; 338 unsigned int len_in;
339 dma_addr_t dma_in;
340 dma_addr_t dma_out;
321 341
322 enum { 342 enum {
323 CMD_HEADER = 0, 343 CMD_HEADER = 0,
@@ -325,15 +345,14 @@ struct mceusb_dev {
325 CMD_DATA, 345 CMD_DATA,
326 PARSE_IRDATA, 346 PARSE_IRDATA,
327 } parser_state; 347 } parser_state;
328 u8 cmd, rem; /* Remaining IR data bytes in packet */
329 348
330 dma_addr_t dma_in; 349 u8 cmd, rem; /* Remaining IR data bytes in packet */
331 dma_addr_t dma_out;
332 350
333 struct { 351 struct {
334 u32 connected:1; 352 u32 connected:1;
335 u32 tx_mask_inverted:1; 353 u32 tx_mask_normal:1;
336 u32 microsoft_gen1:1; 354 u32 microsoft_gen1:1;
355 u32 no_tx:1;
337 } flags; 356 } flags;
338 357
339 /* transmit support */ 358 /* transmit support */
@@ -408,9 +427,10 @@ static int mceusb_cmdsize(u8 cmd, u8 subcmd)
408 case MCE_CMD_UNKNOWN: 427 case MCE_CMD_UNKNOWN:
409 case MCE_CMD_S_CARRIER: 428 case MCE_CMD_S_CARRIER:
410 case MCE_CMD_S_TIMEOUT: 429 case MCE_CMD_S_TIMEOUT:
411 case MCE_CMD_G_RXSENSOR: 430 case MCE_RSP_PULSE_COUNT:
412 datasize = 2; 431 datasize = 2;
413 break; 432 break;
433 case MCE_CMD_SIG_END:
414 case MCE_CMD_S_TXMASK: 434 case MCE_CMD_S_TXMASK:
415 case MCE_CMD_S_RXSENSOR: 435 case MCE_CMD_S_RXSENSOR:
416 datasize = 1; 436 datasize = 1;
@@ -433,7 +453,7 @@ static void mceusb_dev_printdata(struct mceusb_dev *ir, char *buf,
433 return; 453 return;
434 454
435 /* skip meaningless 0xb1 0x60 header bytes on orig receiver */ 455 /* skip meaningless 0xb1 0x60 header bytes on orig receiver */
436 if (ir->flags.microsoft_gen1 && !out) 456 if (ir->flags.microsoft_gen1 && !out && !offset)
437 skip = 2; 457 skip = 2;
438 458
439 if (len <= skip) 459 if (len <= skip)
@@ -491,6 +511,9 @@ static void mceusb_dev_printdata(struct mceusb_dev *ir, char *buf,
491 break; 511 break;
492 case MCE_COMMAND_HEADER: 512 case MCE_COMMAND_HEADER:
493 switch (subcmd) { 513 switch (subcmd) {
514 case MCE_CMD_SIG_END:
515 dev_info(dev, "End of signal\n");
516 break;
494 case MCE_CMD_PING: 517 case MCE_CMD_PING:
495 dev_info(dev, "Ping\n"); 518 dev_info(dev, "Ping\n");
496 break; 519 break;
@@ -525,10 +548,11 @@ static void mceusb_dev_printdata(struct mceusb_dev *ir, char *buf,
525 inout, data1 == 0x02 ? "short" : "long"); 548 inout, data1 == 0x02 ? "short" : "long");
526 break; 549 break;
527 case MCE_CMD_G_RXSENSOR: 550 case MCE_CMD_G_RXSENSOR:
528 if (len == 2) 551 /* aka MCE_RSP_PULSE_COUNT */
552 if (out)
529 dev_info(dev, "Get receive sensor\n"); 553 dev_info(dev, "Get receive sensor\n");
530 else 554 else if (ir->learning_enabled)
531 dev_info(dev, "Received pulse count is %d\n", 555 dev_info(dev, "RX pulse count: %d\n",
532 ((data1 << 8) | data2)); 556 ((data1 << 8) | data2));
533 break; 557 break;
534 case MCE_RSP_CMD_INVALID: 558 case MCE_RSP_CMD_INVALID:
@@ -724,16 +748,16 @@ out:
724 return ret ? ret : n; 748 return ret ? ret : n;
725} 749}
726 750
727/* Sets active IR outputs -- mce devices typically (all?) have two */ 751/* Sets active IR outputs -- mce devices typically have two */
728static int mceusb_set_tx_mask(void *priv, u32 mask) 752static int mceusb_set_tx_mask(void *priv, u32 mask)
729{ 753{
730 struct mceusb_dev *ir = priv; 754 struct mceusb_dev *ir = priv;
731 755
732 if (ir->flags.tx_mask_inverted) 756 if (ir->flags.tx_mask_normal)
757 ir->tx_mask = mask;
758 else
733 ir->tx_mask = (mask != MCE_DEFAULT_TX_MASK ? 759 ir->tx_mask = (mask != MCE_DEFAULT_TX_MASK ?
734 mask ^ MCE_DEFAULT_TX_MASK : mask) << 1; 760 mask ^ MCE_DEFAULT_TX_MASK : mask) << 1;
735 else
736 ir->tx_mask = mask;
737 761
738 return 0; 762 return 0;
739} 763}
@@ -752,7 +776,7 @@ static int mceusb_set_tx_carrier(void *priv, u32 carrier)
752 776
753 if (carrier == 0) { 777 if (carrier == 0) {
754 ir->carrier = carrier; 778 ir->carrier = carrier;
755 cmdbuf[2] = 0x01; 779 cmdbuf[2] = MCE_CMD_SIG_END;
756 cmdbuf[3] = MCE_IRDATA_TRAILER; 780 cmdbuf[3] = MCE_IRDATA_TRAILER;
757 dev_dbg(ir->dev, "%s: disabling carrier " 781 dev_dbg(ir->dev, "%s: disabling carrier "
758 "modulation\n", __func__); 782 "modulation\n", __func__);
@@ -782,6 +806,34 @@ static int mceusb_set_tx_carrier(void *priv, u32 carrier)
782 return carrier; 806 return carrier;
783} 807}
784 808
809/*
810 * We don't do anything but print debug spew for many of the command bits
811 * we receive from the hardware, but some of them are useful information
812 * we want to store so that we can use them.
813 */
814static void mceusb_handle_command(struct mceusb_dev *ir, int index)
815{
816 u8 hi = ir->buf_in[index + 1] & 0xff;
817 u8 lo = ir->buf_in[index + 2] & 0xff;
818
819 switch (ir->buf_in[index]) {
820 /* 2-byte return value commands */
821 case MCE_CMD_S_TIMEOUT:
822 ir->props->timeout = MS_TO_NS((hi << 8 | lo) / 2);
823 break;
824
825 /* 1-byte return value commands */
826 case MCE_CMD_S_TXMASK:
827 ir->tx_mask = hi;
828 break;
829 case MCE_CMD_S_RXSENSOR:
830 ir->learning_enabled = (hi == 0x02);
831 break;
832 default:
833 break;
834 }
835}
836
785static void mceusb_process_ir_data(struct mceusb_dev *ir, int buf_len) 837static void mceusb_process_ir_data(struct mceusb_dev *ir, int buf_len)
786{ 838{
787 DEFINE_IR_RAW_EVENT(rawir); 839 DEFINE_IR_RAW_EVENT(rawir);
@@ -791,39 +843,30 @@ static void mceusb_process_ir_data(struct mceusb_dev *ir, int buf_len)
791 if (ir->flags.microsoft_gen1) 843 if (ir->flags.microsoft_gen1)
792 i = 2; 844 i = 2;
793 845
846 /* if there's no data, just return now */
847 if (buf_len <= i)
848 return;
849
794 for (; i < buf_len; i++) { 850 for (; i < buf_len; i++) {
795 switch (ir->parser_state) { 851 switch (ir->parser_state) {
796 case SUBCMD: 852 case SUBCMD:
797 ir->rem = mceusb_cmdsize(ir->cmd, ir->buf_in[i]); 853 ir->rem = mceusb_cmdsize(ir->cmd, ir->buf_in[i]);
798 mceusb_dev_printdata(ir, ir->buf_in, i - 1, 854 mceusb_dev_printdata(ir, ir->buf_in, i - 1,
799 ir->rem + 2, false); 855 ir->rem + 2, false);
856 mceusb_handle_command(ir, i);
800 ir->parser_state = CMD_DATA; 857 ir->parser_state = CMD_DATA;
801 break; 858 break;
802 case PARSE_IRDATA: 859 case PARSE_IRDATA:
803 ir->rem--; 860 ir->rem--;
804 rawir.pulse = ((ir->buf_in[i] & MCE_PULSE_BIT) != 0); 861 rawir.pulse = ((ir->buf_in[i] & MCE_PULSE_BIT) != 0);
805 rawir.duration = (ir->buf_in[i] & MCE_PULSE_MASK) 862 rawir.duration = (ir->buf_in[i] & MCE_PULSE_MASK)
806 * MCE_TIME_UNIT * 1000; 863 * MS_TO_NS(MCE_TIME_UNIT);
807
808 if ((ir->buf_in[i] & MCE_PULSE_MASK) == 0x7f) {
809 if (ir->rawir.pulse == rawir.pulse) {
810 ir->rawir.duration += rawir.duration;
811 } else {
812 ir->rawir.duration = rawir.duration;
813 ir->rawir.pulse = rawir.pulse;
814 }
815 if (ir->rem)
816 break;
817 }
818 rawir.duration += ir->rawir.duration;
819 ir->rawir.duration = 0;
820 ir->rawir.pulse = rawir.pulse;
821 864
822 dev_dbg(ir->dev, "Storing %s with duration %d\n", 865 dev_dbg(ir->dev, "Storing %s with duration %d\n",
823 rawir.pulse ? "pulse" : "space", 866 rawir.pulse ? "pulse" : "space",
824 rawir.duration); 867 rawir.duration);
825 868
826 ir_raw_event_store(ir->idev, &rawir); 869 ir_raw_event_store_with_filter(ir->idev, &rawir);
827 break; 870 break;
828 case CMD_DATA: 871 case CMD_DATA:
829 ir->rem--; 872 ir->rem--;
@@ -839,17 +882,10 @@ static void mceusb_process_ir_data(struct mceusb_dev *ir, int buf_len)
839 continue; 882 continue;
840 } 883 }
841 ir->rem = (ir->cmd & MCE_PACKET_LENGTH_MASK); 884 ir->rem = (ir->cmd & MCE_PACKET_LENGTH_MASK);
842 mceusb_dev_printdata(ir, ir->buf_in, i, ir->rem + 1, false); 885 mceusb_dev_printdata(ir, ir->buf_in,
843 if (ir->rem) { 886 i, ir->rem + 1, false);
887 if (ir->rem)
844 ir->parser_state = PARSE_IRDATA; 888 ir->parser_state = PARSE_IRDATA;
845 break;
846 }
847 /*
848 * a package with len=0 (e. g. 0x80) means end of
849 * data. We could use it to do the call to
850 * ir_raw_event_handle(). For now, we don't need to
851 * use it.
852 */
853 break; 889 break;
854 } 890 }
855 891
@@ -984,9 +1020,11 @@ static void mceusb_get_parameters(struct mceusb_dev *ir)
984 mce_async_out(ir, GET_CARRIER_FREQ, sizeof(GET_CARRIER_FREQ)); 1020 mce_async_out(ir, GET_CARRIER_FREQ, sizeof(GET_CARRIER_FREQ));
985 mce_sync_in(ir, NULL, maxp); 1021 mce_sync_in(ir, NULL, maxp);
986 1022
987 /* get the transmitter bitmask */ 1023 if (!ir->flags.no_tx) {
988 mce_async_out(ir, GET_TX_BITMASK, sizeof(GET_TX_BITMASK)); 1024 /* get the transmitter bitmask */
989 mce_sync_in(ir, NULL, maxp); 1025 mce_async_out(ir, GET_TX_BITMASK, sizeof(GET_TX_BITMASK));
1026 mce_sync_in(ir, NULL, maxp);
1027 }
990 1028
991 /* get receiver timeout value */ 1029 /* get receiver timeout value */
992 mce_async_out(ir, GET_RX_TIMEOUT, sizeof(GET_RX_TIMEOUT)); 1030 mce_async_out(ir, GET_RX_TIMEOUT, sizeof(GET_RX_TIMEOUT));
@@ -1035,12 +1073,18 @@ static struct input_dev *mceusb_init_input_dev(struct mceusb_dev *ir)
1035 props->priv = ir; 1073 props->priv = ir;
1036 props->driver_type = RC_DRIVER_IR_RAW; 1074 props->driver_type = RC_DRIVER_IR_RAW;
1037 props->allowed_protos = IR_TYPE_ALL; 1075 props->allowed_protos = IR_TYPE_ALL;
1038 props->s_tx_mask = mceusb_set_tx_mask; 1076 props->timeout = MS_TO_NS(1000);
1039 props->s_tx_carrier = mceusb_set_tx_carrier; 1077 if (!ir->flags.no_tx) {
1040 props->tx_ir = mceusb_tx_ir; 1078 props->s_tx_mask = mceusb_set_tx_mask;
1079 props->s_tx_carrier = mceusb_set_tx_carrier;
1080 props->tx_ir = mceusb_tx_ir;
1081 }
1041 1082
1042 ir->props = props; 1083 ir->props = props;
1043 1084
1085 usb_to_input_id(ir->usbdev, &idev->id);
1086 idev->dev.parent = ir->dev;
1087
1044 if (mceusb_model[ir->model].rc_map) 1088 if (mceusb_model[ir->model].rc_map)
1045 rc_map = mceusb_model[ir->model].rc_map; 1089 rc_map = mceusb_model[ir->model].rc_map;
1046 1090
@@ -1074,16 +1118,16 @@ static int __devinit mceusb_dev_probe(struct usb_interface *intf,
1074 enum mceusb_model_type model = id->driver_info; 1118 enum mceusb_model_type model = id->driver_info;
1075 bool is_gen3; 1119 bool is_gen3;
1076 bool is_microsoft_gen1; 1120 bool is_microsoft_gen1;
1077 bool tx_mask_inverted; 1121 bool tx_mask_normal;
1078 bool is_polaris; 1122 bool is_polaris;
1079 1123
1080 dev_dbg(&intf->dev, ": %s called\n", __func__); 1124 dev_dbg(&intf->dev, "%s called\n", __func__);
1081 1125
1082 idesc = intf->cur_altsetting; 1126 idesc = intf->cur_altsetting;
1083 1127
1084 is_gen3 = mceusb_model[model].mce_gen3; 1128 is_gen3 = mceusb_model[model].mce_gen3;
1085 is_microsoft_gen1 = mceusb_model[model].mce_gen1; 1129 is_microsoft_gen1 = mceusb_model[model].mce_gen1;
1086 tx_mask_inverted = mceusb_model[model].tx_mask_inverted; 1130 tx_mask_normal = mceusb_model[model].tx_mask_normal;
1087 is_polaris = mceusb_model[model].is_polaris; 1131 is_polaris = mceusb_model[model].is_polaris;
1088 1132
1089 if (is_polaris) { 1133 if (is_polaris) {
@@ -1107,7 +1151,7 @@ static int __devinit mceusb_dev_probe(struct usb_interface *intf,
1107 ep_in = ep; 1151 ep_in = ep;
1108 ep_in->bmAttributes = USB_ENDPOINT_XFER_INT; 1152 ep_in->bmAttributes = USB_ENDPOINT_XFER_INT;
1109 ep_in->bInterval = 1; 1153 ep_in->bInterval = 1;
1110 dev_dbg(&intf->dev, ": acceptable inbound endpoint " 1154 dev_dbg(&intf->dev, "acceptable inbound endpoint "
1111 "found\n"); 1155 "found\n");
1112 } 1156 }
1113 1157
@@ -1122,12 +1166,12 @@ static int __devinit mceusb_dev_probe(struct usb_interface *intf,
1122 ep_out = ep; 1166 ep_out = ep;
1123 ep_out->bmAttributes = USB_ENDPOINT_XFER_INT; 1167 ep_out->bmAttributes = USB_ENDPOINT_XFER_INT;
1124 ep_out->bInterval = 1; 1168 ep_out->bInterval = 1;
1125 dev_dbg(&intf->dev, ": acceptable outbound endpoint " 1169 dev_dbg(&intf->dev, "acceptable outbound endpoint "
1126 "found\n"); 1170 "found\n");
1127 } 1171 }
1128 } 1172 }
1129 if (ep_in == NULL) { 1173 if (ep_in == NULL) {
1130 dev_dbg(&intf->dev, ": inbound and/or endpoint not found\n"); 1174 dev_dbg(&intf->dev, "inbound and/or endpoint not found\n");
1131 return -ENODEV; 1175 return -ENODEV;
1132 } 1176 }
1133 1177
@@ -1150,11 +1194,10 @@ static int __devinit mceusb_dev_probe(struct usb_interface *intf,
1150 ir->dev = &intf->dev; 1194 ir->dev = &intf->dev;
1151 ir->len_in = maxp; 1195 ir->len_in = maxp;
1152 ir->flags.microsoft_gen1 = is_microsoft_gen1; 1196 ir->flags.microsoft_gen1 = is_microsoft_gen1;
1153 ir->flags.tx_mask_inverted = tx_mask_inverted; 1197 ir->flags.tx_mask_normal = tx_mask_normal;
1198 ir->flags.no_tx = mceusb_model[model].no_tx;
1154 ir->model = model; 1199 ir->model = model;
1155 1200
1156 init_ir_raw_event(&ir->rawir);
1157
1158 /* Saving usb interface data for use by the transmitter routine */ 1201 /* Saving usb interface data for use by the transmitter routine */
1159 ir->usb_ep_in = ep_in; 1202 ir->usb_ep_in = ep_in;
1160 ir->usb_ep_out = ep_out; 1203 ir->usb_ep_out = ep_out;
@@ -1191,7 +1234,8 @@ static int __devinit mceusb_dev_probe(struct usb_interface *intf,
1191 1234
1192 mceusb_get_parameters(ir); 1235 mceusb_get_parameters(ir);
1193 1236
1194 mceusb_set_tx_mask(ir, MCE_DEFAULT_TX_MASK); 1237 if (!ir->flags.no_tx)
1238 mceusb_set_tx_mask(ir, MCE_DEFAULT_TX_MASK);
1195 1239
1196 usb_set_intfdata(intf, ir); 1240 usb_set_intfdata(intf, ir);
1197 1241
diff --git a/drivers/media/IR/nuvoton-cir.c b/drivers/media/IR/nuvoton-cir.c
index 301be53aee85..acc729c79cec 100644
--- a/drivers/media/IR/nuvoton-cir.c
+++ b/drivers/media/IR/nuvoton-cir.c
@@ -603,6 +603,8 @@ static void nvt_process_rx_ir_data(struct nvt_dev *nvt)
603 count = nvt->pkts; 603 count = nvt->pkts;
604 nvt_dbg_verbose("Processing buffer of len %d", count); 604 nvt_dbg_verbose("Processing buffer of len %d", count);
605 605
606 init_ir_raw_event(&rawir);
607
606 for (i = 0; i < count; i++) { 608 for (i = 0; i < count; i++) {
607 nvt->pkts--; 609 nvt->pkts--;
608 sample = nvt->buf[i]; 610 sample = nvt->buf[i];
@@ -643,11 +645,15 @@ static void nvt_process_rx_ir_data(struct nvt_dev *nvt)
643 * indicates end of IR signal, but new data incoming. In both 645 * indicates end of IR signal, but new data incoming. In both
644 * cases, it means we're ready to call ir_raw_event_handle 646 * cases, it means we're ready to call ir_raw_event_handle
645 */ 647 */
646 if (sample == BUF_PULSE_BIT || ((sample != BUF_LEN_MASK) && 648 if ((sample == BUF_PULSE_BIT) && nvt->pkts) {
647 (sample & BUF_REPEAT_MASK) == BUF_REPEAT_BYTE)) 649 nvt_dbg("Calling ir_raw_event_handle (signal end)\n");
648 ir_raw_event_handle(nvt->rdev); 650 ir_raw_event_handle(nvt->rdev);
651 }
649 } 652 }
650 653
654 nvt_dbg("Calling ir_raw_event_handle (buffer empty)\n");
655 ir_raw_event_handle(nvt->rdev);
656
651 if (nvt->pkts) { 657 if (nvt->pkts) {
652 nvt_dbg("Odd, pkts should be 0 now... (its %u)", nvt->pkts); 658 nvt_dbg("Odd, pkts should be 0 now... (its %u)", nvt->pkts);
653 nvt->pkts = 0; 659 nvt->pkts = 0;
diff --git a/drivers/media/IR/streamzap.c b/drivers/media/IR/streamzap.c
index 548381c35bfd..3a20aef67d08 100644
--- a/drivers/media/IR/streamzap.c
+++ b/drivers/media/IR/streamzap.c
@@ -34,8 +34,9 @@
34#include <linux/device.h> 34#include <linux/device.h>
35#include <linux/module.h> 35#include <linux/module.h>
36#include <linux/slab.h> 36#include <linux/slab.h>
37#include <linux/usb.h>
38#include <linux/input.h> 37#include <linux/input.h>
38#include <linux/usb.h>
39#include <linux/usb/input.h>
39#include <media/ir-core.h> 40#include <media/ir-core.h>
40 41
41#define DRIVER_VERSION "1.61" 42#define DRIVER_VERSION "1.61"
@@ -140,7 +141,9 @@ static struct usb_driver streamzap_driver = {
140 141
141static void sz_push(struct streamzap_ir *sz, struct ir_raw_event rawir) 142static void sz_push(struct streamzap_ir *sz, struct ir_raw_event rawir)
142{ 143{
143 ir_raw_event_store(sz->idev, &rawir); 144 dev_dbg(sz->dev, "Storing %s with duration %u us\n",
145 (rawir.pulse ? "pulse" : "space"), rawir.duration);
146 ir_raw_event_store_with_filter(sz->idev, &rawir);
144} 147}
145 148
146static void sz_push_full_pulse(struct streamzap_ir *sz, 149static void sz_push_full_pulse(struct streamzap_ir *sz,
@@ -167,7 +170,6 @@ static void sz_push_full_pulse(struct streamzap_ir *sz,
167 rawir.duration *= 1000; 170 rawir.duration *= 1000;
168 rawir.duration &= IR_MAX_DURATION; 171 rawir.duration &= IR_MAX_DURATION;
169 } 172 }
170 dev_dbg(sz->dev, "ls %u\n", rawir.duration);
171 sz_push(sz, rawir); 173 sz_push(sz, rawir);
172 174
173 sz->idle = false; 175 sz->idle = false;
@@ -180,7 +182,6 @@ static void sz_push_full_pulse(struct streamzap_ir *sz,
180 sz->sum += rawir.duration; 182 sz->sum += rawir.duration;
181 rawir.duration *= 1000; 183 rawir.duration *= 1000;
182 rawir.duration &= IR_MAX_DURATION; 184 rawir.duration &= IR_MAX_DURATION;
183 dev_dbg(sz->dev, "p %u\n", rawir.duration);
184 sz_push(sz, rawir); 185 sz_push(sz, rawir);
185} 186}
186 187
@@ -200,7 +201,6 @@ static void sz_push_full_space(struct streamzap_ir *sz,
200 rawir.duration += SZ_RESOLUTION / 2; 201 rawir.duration += SZ_RESOLUTION / 2;
201 sz->sum += rawir.duration; 202 sz->sum += rawir.duration;
202 rawir.duration *= 1000; 203 rawir.duration *= 1000;
203 dev_dbg(sz->dev, "s %u\n", rawir.duration);
204 sz_push(sz, rawir); 204 sz_push(sz, rawir);
205} 205}
206 206
@@ -221,8 +221,6 @@ static void streamzap_callback(struct urb *urb)
221 struct streamzap_ir *sz; 221 struct streamzap_ir *sz;
222 unsigned int i; 222 unsigned int i;
223 int len; 223 int len;
224 static int timeout = (((SZ_TIMEOUT * SZ_RESOLUTION * 1000) &
225 IR_MAX_DURATION) | 0x03000000);
226 224
227 if (!urb) 225 if (!urb)
228 return; 226 return;
@@ -246,7 +244,7 @@ static void streamzap_callback(struct urb *urb)
246 244
247 dev_dbg(sz->dev, "%s: received urb, len %d\n", __func__, len); 245 dev_dbg(sz->dev, "%s: received urb, len %d\n", __func__, len);
248 for (i = 0; i < len; i++) { 246 for (i = 0; i < len; i++) {
249 dev_dbg(sz->dev, "sz idx %d: %x\n", 247 dev_dbg(sz->dev, "sz->buf_in[%d]: %x\n",
250 i, (unsigned char)sz->buf_in[i]); 248 i, (unsigned char)sz->buf_in[i]);
251 switch (sz->decoder_state) { 249 switch (sz->decoder_state) {
252 case PulseSpace: 250 case PulseSpace:
@@ -273,7 +271,7 @@ static void streamzap_callback(struct urb *urb)
273 DEFINE_IR_RAW_EVENT(rawir); 271 DEFINE_IR_RAW_EVENT(rawir);
274 272
275 rawir.pulse = false; 273 rawir.pulse = false;
276 rawir.duration = timeout; 274 rawir.duration = sz->props->timeout;
277 sz->idle = true; 275 sz->idle = true;
278 if (sz->timeout_enabled) 276 if (sz->timeout_enabled)
279 sz_push(sz, rawir); 277 sz_push(sz, rawir);
@@ -335,6 +333,9 @@ static struct input_dev *streamzap_init_input_dev(struct streamzap_ir *sz)
335 333
336 sz->props = props; 334 sz->props = props;
337 335
336 usb_to_input_id(sz->usbdev, &idev->id);
337 idev->dev.parent = sz->dev;
338
338 ret = ir_input_register(idev, RC_MAP_STREAMZAP, props, DRIVER_NAME); 339 ret = ir_input_register(idev, RC_MAP_STREAMZAP, props, DRIVER_NAME);
339 if (ret < 0) { 340 if (ret < 0) {
340 dev_err(dev, "remote input device register failed\n"); 341 dev_err(dev, "remote input device register failed\n");
@@ -444,6 +445,8 @@ static int __devinit streamzap_probe(struct usb_interface *intf,
444 sz->decoder_state = PulseSpace; 445 sz->decoder_state = PulseSpace;
445 /* FIXME: don't yet have a way to set this */ 446 /* FIXME: don't yet have a way to set this */
446 sz->timeout_enabled = true; 447 sz->timeout_enabled = true;
448 sz->props->timeout = (((SZ_TIMEOUT * SZ_RESOLUTION * 1000) &
449 IR_MAX_DURATION) | 0x03000000);
447 #if 0 450 #if 0
448 /* not yet supported, depends on patches from maxim */ 451 /* not yet supported, depends on patches from maxim */
449 /* see also: LIRC_GET_REC_RESOLUTION and LIRC_SET_REC_TIMEOUT */ 452 /* see also: LIRC_GET_REC_RESOLUTION and LIRC_SET_REC_TIMEOUT */
diff --git a/drivers/media/video/cx25840/cx25840-core.c b/drivers/media/video/cx25840/cx25840-core.c
index dfb198d0415b..f16461844c5c 100644
--- a/drivers/media/video/cx25840/cx25840-core.c
+++ b/drivers/media/video/cx25840/cx25840-core.c
@@ -1989,8 +1989,23 @@ static int cx25840_probe(struct i2c_client *client,
1989 v4l2_ctrl_new_std(&state->hdl, &cx25840_ctrl_ops, 1989 v4l2_ctrl_new_std(&state->hdl, &cx25840_ctrl_ops,
1990 V4L2_CID_HUE, -128, 127, 1, 0); 1990 V4L2_CID_HUE, -128, 127, 1, 0);
1991 if (!is_cx2583x(state)) { 1991 if (!is_cx2583x(state)) {
1992 default_volume = 228 - cx25840_read(client, 0x8d4); 1992 default_volume = cx25840_read(client, 0x8d4);
1993 default_volume = ((default_volume / 2) + 23) << 9; 1993 /*
1994 * Enforce the legacy PVR-350/MSP3400 to PVR-150/CX25843 volume
1995 * scale mapping limits to avoid -ERANGE errors when
1996 * initializing the volume control
1997 */
1998 if (default_volume > 228) {
1999 /* Bottom out at -96 dB, v4l2 vol range 0x2e00-0x2fff */
2000 default_volume = 228;
2001 cx25840_write(client, 0x8d4, 228);
2002 }
2003 else if (default_volume < 20) {
2004 /* Top out at + 8 dB, v4l2 vol range 0xfe00-0xffff */
2005 default_volume = 20;
2006 cx25840_write(client, 0x8d4, 20);
2007 }
2008 default_volume = (((228 - default_volume) >> 1) + 23) << 9;
1994 2009
1995 state->volume = v4l2_ctrl_new_std(&state->hdl, 2010 state->volume = v4l2_ctrl_new_std(&state->hdl,
1996 &cx25840_audio_ctrl_ops, V4L2_CID_AUDIO_VOLUME, 2011 &cx25840_audio_ctrl_ops, V4L2_CID_AUDIO_VOLUME,
diff --git a/drivers/media/video/cx88/cx88-alsa.c b/drivers/media/video/cx88/cx88-alsa.c
index 4aaa47c0eabf..54b7fcd469a8 100644
--- a/drivers/media/video/cx88/cx88-alsa.c
+++ b/drivers/media/video/cx88/cx88-alsa.c
@@ -40,7 +40,6 @@
40#include <sound/control.h> 40#include <sound/control.h>
41#include <sound/initval.h> 41#include <sound/initval.h>
42#include <sound/tlv.h> 42#include <sound/tlv.h>
43#include <media/wm8775.h>
44 43
45#include "cx88.h" 44#include "cx88.h"
46#include "cx88-reg.h" 45#include "cx88-reg.h"
@@ -587,47 +586,26 @@ static int snd_cx88_volume_put(struct snd_kcontrol *kcontrol,
587 int left, right, v, b; 586 int left, right, v, b;
588 int changed = 0; 587 int changed = 0;
589 u32 old; 588 u32 old;
590 struct v4l2_control client_ctl;
591
592 /* Pass volume & balance onto any WM8775 */
593 if (value->value.integer.value[0] >= value->value.integer.value[1]) {
594 v = value->value.integer.value[0] << 10;
595 b = value->value.integer.value[0] ?
596 (0x8000 * value->value.integer.value[1]) / value->value.integer.value[0] :
597 0x8000;
598 } else {
599 v = value->value.integer.value[1] << 10;
600 b = value->value.integer.value[1] ?
601 0xffff - (0x8000 * value->value.integer.value[0]) / value->value.integer.value[1] :
602 0x8000;
603 }
604 client_ctl.value = v;
605 client_ctl.id = V4L2_CID_AUDIO_VOLUME;
606 call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl);
607
608 client_ctl.value = b;
609 client_ctl.id = V4L2_CID_AUDIO_BALANCE;
610 call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl);
611 589
612 left = value->value.integer.value[0] & 0x3f; 590 left = value->value.integer.value[0] & 0x3f;
613 right = value->value.integer.value[1] & 0x3f; 591 right = value->value.integer.value[1] & 0x3f;
614 b = right - left; 592 b = right - left;
615 if (b < 0) { 593 if (b < 0) {
616 v = 0x3f - left; 594 v = 0x3f - left;
617 b = (-b) | 0x40; 595 b = (-b) | 0x40;
618 } else { 596 } else {
619 v = 0x3f - right; 597 v = 0x3f - right;
620 } 598 }
621 /* Do we really know this will always be called with IRQs on? */ 599 /* Do we really know this will always be called with IRQs on? */
622 spin_lock_irq(&chip->reg_lock); 600 spin_lock_irq(&chip->reg_lock);
623 old = cx_read(AUD_VOL_CTL); 601 old = cx_read(AUD_VOL_CTL);
624 if (v != (old & 0x3f)) { 602 if (v != (old & 0x3f)) {
625 cx_swrite(SHADOW_AUD_VOL_CTL, AUD_VOL_CTL, (old & ~0x3f) | v); 603 cx_write(AUD_VOL_CTL, (old & ~0x3f) | v);
626 changed = 1; 604 changed = 1;
627 } 605 }
628 if ((cx_read(AUD_BAL_CTL) & 0x7f) != b) { 606 if (cx_read(AUD_BAL_CTL) != b) {
629 cx_write(AUD_BAL_CTL, b); 607 cx_write(AUD_BAL_CTL, b);
630 changed = 1; 608 changed = 1;
631 } 609 }
632 spin_unlock_irq(&chip->reg_lock); 610 spin_unlock_irq(&chip->reg_lock);
633 611
@@ -640,7 +618,7 @@ static const struct snd_kcontrol_new snd_cx88_volume = {
640 .iface = SNDRV_CTL_ELEM_IFACE_MIXER, 618 .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
641 .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | 619 .access = SNDRV_CTL_ELEM_ACCESS_READWRITE |
642 SNDRV_CTL_ELEM_ACCESS_TLV_READ, 620 SNDRV_CTL_ELEM_ACCESS_TLV_READ,
643 .name = "Analog-TV Volume", 621 .name = "Playback Volume",
644 .info = snd_cx88_volume_info, 622 .info = snd_cx88_volume_info,
645 .get = snd_cx88_volume_get, 623 .get = snd_cx88_volume_get,
646 .put = snd_cx88_volume_put, 624 .put = snd_cx88_volume_put,
@@ -671,14 +649,7 @@ static int snd_cx88_switch_put(struct snd_kcontrol *kcontrol,
671 vol = cx_read(AUD_VOL_CTL); 649 vol = cx_read(AUD_VOL_CTL);
672 if (value->value.integer.value[0] != !(vol & bit)) { 650 if (value->value.integer.value[0] != !(vol & bit)) {
673 vol ^= bit; 651 vol ^= bit;
674 cx_swrite(SHADOW_AUD_VOL_CTL, AUD_VOL_CTL, vol); 652 cx_write(AUD_VOL_CTL, vol);
675 /* Pass mute onto any WM8775 */
676 if ((1<<6) == bit) {
677 struct v4l2_control client_ctl;
678 client_ctl.value = 0 != (vol & bit);
679 client_ctl.id = V4L2_CID_AUDIO_MUTE;
680 call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl);
681 }
682 ret = 1; 653 ret = 1;
683 } 654 }
684 spin_unlock_irq(&chip->reg_lock); 655 spin_unlock_irq(&chip->reg_lock);
@@ -687,7 +658,7 @@ static int snd_cx88_switch_put(struct snd_kcontrol *kcontrol,
687 658
688static const struct snd_kcontrol_new snd_cx88_dac_switch = { 659static const struct snd_kcontrol_new snd_cx88_dac_switch = {
689 .iface = SNDRV_CTL_ELEM_IFACE_MIXER, 660 .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
690 .name = "Audio-Out Switch", 661 .name = "Playback Switch",
691 .info = snd_ctl_boolean_mono_info, 662 .info = snd_ctl_boolean_mono_info,
692 .get = snd_cx88_switch_get, 663 .get = snd_cx88_switch_get,
693 .put = snd_cx88_switch_put, 664 .put = snd_cx88_switch_put,
@@ -696,49 +667,13 @@ static const struct snd_kcontrol_new snd_cx88_dac_switch = {
696 667
697static const struct snd_kcontrol_new snd_cx88_source_switch = { 668static const struct snd_kcontrol_new snd_cx88_source_switch = {
698 .iface = SNDRV_CTL_ELEM_IFACE_MIXER, 669 .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
699 .name = "Analog-TV Switch", 670 .name = "Capture Switch",
700 .info = snd_ctl_boolean_mono_info, 671 .info = snd_ctl_boolean_mono_info,
701 .get = snd_cx88_switch_get, 672 .get = snd_cx88_switch_get,
702 .put = snd_cx88_switch_put, 673 .put = snd_cx88_switch_put,
703 .private_value = (1<<6), 674 .private_value = (1<<6),
704}; 675};
705 676
706static int snd_cx88_alc_get(struct snd_kcontrol *kcontrol,
707 struct snd_ctl_elem_value *value)
708{
709 snd_cx88_card_t *chip = snd_kcontrol_chip(kcontrol);
710 struct cx88_core *core = chip->core;
711 struct v4l2_control client_ctl;
712
713 client_ctl.id = V4L2_CID_AUDIO_LOUDNESS;
714 call_hw(core, WM8775_GID, core, g_ctrl, &client_ctl);
715 value->value.integer.value[0] = client_ctl.value ? 1 : 0;
716
717 return 0;
718}
719
720static int snd_cx88_alc_put(struct snd_kcontrol *kcontrol,
721 struct snd_ctl_elem_value *value)
722{
723 snd_cx88_card_t *chip = snd_kcontrol_chip(kcontrol);
724 struct cx88_core *core = chip->core;
725 struct v4l2_control client_ctl;
726
727 client_ctl.value = 0 != value->value.integer.value[0];
728 client_ctl.id = V4L2_CID_AUDIO_LOUDNESS;
729 call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl);
730
731 return 0;
732}
733
734static struct snd_kcontrol_new snd_cx88_alc_switch = {
735 .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
736 .name = "Line-In ALC Switch",
737 .info = snd_ctl_boolean_mono_info,
738 .get = snd_cx88_alc_get,
739 .put = snd_cx88_alc_put,
740};
741
742/**************************************************************************** 677/****************************************************************************
743 Basic Flow for Sound Devices 678 Basic Flow for Sound Devices
744 ****************************************************************************/ 679 ****************************************************************************/
@@ -860,7 +795,6 @@ static int __devinit cx88_audio_initdev(struct pci_dev *pci,
860{ 795{
861 struct snd_card *card; 796 struct snd_card *card;
862 snd_cx88_card_t *chip; 797 snd_cx88_card_t *chip;
863 struct v4l2_subdev *sd;
864 int err; 798 int err;
865 799
866 if (devno >= SNDRV_CARDS) 800 if (devno >= SNDRV_CARDS)
@@ -896,15 +830,6 @@ static int __devinit cx88_audio_initdev(struct pci_dev *pci,
896 if (err < 0) 830 if (err < 0)
897 goto error; 831 goto error;
898 832
899 /* If there's a wm8775 then add a Line-In ALC switch */
900 list_for_each_entry(sd, &chip->core->v4l2_dev.subdevs, list) {
901 if (WM8775_GID == sd->grp_id) {
902 snd_ctl_add(card, snd_ctl_new1(&snd_cx88_alc_switch,
903 chip));
904 break;
905 }
906 }
907
908 strcpy (card->driver, "CX88x"); 833 strcpy (card->driver, "CX88x");
909 sprintf(card->shortname, "Conexant CX%x", pci->device); 834 sprintf(card->shortname, "Conexant CX%x", pci->device);
910 sprintf(card->longname, "%s at %#llx", 835 sprintf(card->longname, "%s at %#llx",
diff --git a/drivers/media/video/cx88/cx88-cards.c b/drivers/media/video/cx88/cx88-cards.c
index 9b9e169cce90..0ccc2afd7266 100644
--- a/drivers/media/video/cx88/cx88-cards.c
+++ b/drivers/media/video/cx88/cx88-cards.c
@@ -1007,15 +1007,22 @@ static const struct cx88_board cx88_boards[] = {
1007 .radio_type = UNSET, 1007 .radio_type = UNSET,
1008 .tuner_addr = ADDR_UNSET, 1008 .tuner_addr = ADDR_UNSET,
1009 .radio_addr = ADDR_UNSET, 1009 .radio_addr = ADDR_UNSET,
1010 .audio_chip = V4L2_IDENT_WM8775,
1010 .input = {{ 1011 .input = {{
1011 .type = CX88_VMUX_DVB, 1012 .type = CX88_VMUX_DVB,
1012 .vmux = 0, 1013 .vmux = 0,
1014 /* 2: Line-In */
1015 .audioroute = 2,
1013 },{ 1016 },{
1014 .type = CX88_VMUX_COMPOSITE1, 1017 .type = CX88_VMUX_COMPOSITE1,
1015 .vmux = 1, 1018 .vmux = 1,
1019 /* 2: Line-In */
1020 .audioroute = 2,
1016 },{ 1021 },{
1017 .type = CX88_VMUX_SVIDEO, 1022 .type = CX88_VMUX_SVIDEO,
1018 .vmux = 2, 1023 .vmux = 2,
1024 /* 2: Line-In */
1025 .audioroute = 2,
1019 }}, 1026 }},
1020 .mpeg = CX88_MPEG_DVB, 1027 .mpeg = CX88_MPEG_DVB,
1021 }, 1028 },
diff --git a/drivers/media/video/cx88/cx88-video.c b/drivers/media/video/cx88/cx88-video.c
index 62cea9549404..d9249e5a04c9 100644
--- a/drivers/media/video/cx88/cx88-video.c
+++ b/drivers/media/video/cx88/cx88-video.c
@@ -40,7 +40,6 @@
40#include "cx88.h" 40#include "cx88.h"
41#include <media/v4l2-common.h> 41#include <media/v4l2-common.h>
42#include <media/v4l2-ioctl.h> 42#include <media/v4l2-ioctl.h>
43#include <media/wm8775.h>
44 43
45MODULE_DESCRIPTION("v4l2 driver module for cx2388x based TV cards"); 44MODULE_DESCRIPTION("v4l2 driver module for cx2388x based TV cards");
46MODULE_AUTHOR("Gerd Knorr <kraxel@bytesex.org> [SuSE Labs]"); 45MODULE_AUTHOR("Gerd Knorr <kraxel@bytesex.org> [SuSE Labs]");
@@ -977,7 +976,6 @@ int cx88_set_control(struct cx88_core *core, struct v4l2_control *ctl)
977 const struct cx88_ctrl *c = NULL; 976 const struct cx88_ctrl *c = NULL;
978 u32 value,mask; 977 u32 value,mask;
979 int i; 978 int i;
980 struct v4l2_control client_ctl;
981 979
982 for (i = 0; i < CX8800_CTLS; i++) { 980 for (i = 0; i < CX8800_CTLS; i++) {
983 if (cx8800_ctls[i].v.id == ctl->id) { 981 if (cx8800_ctls[i].v.id == ctl->id) {
@@ -991,27 +989,6 @@ int cx88_set_control(struct cx88_core *core, struct v4l2_control *ctl)
991 ctl->value = c->v.minimum; 989 ctl->value = c->v.minimum;
992 if (ctl->value > c->v.maximum) 990 if (ctl->value > c->v.maximum)
993 ctl->value = c->v.maximum; 991 ctl->value = c->v.maximum;
994
995 /* Pass changes onto any WM8775 */
996 client_ctl.id = ctl->id;
997 switch (ctl->id) {
998 case V4L2_CID_AUDIO_MUTE:
999 client_ctl.value = ctl->value;
1000 break;
1001 case V4L2_CID_AUDIO_VOLUME:
1002 client_ctl.value = (ctl->value) ?
1003 (0x90 + ctl->value) << 8 : 0;
1004 break;
1005 case V4L2_CID_AUDIO_BALANCE:
1006 client_ctl.value = ctl->value << 9;
1007 break;
1008 default:
1009 client_ctl.id = 0;
1010 break;
1011 }
1012 if (client_ctl.id)
1013 call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl);
1014
1015 mask=c->mask; 992 mask=c->mask;
1016 switch (ctl->id) { 993 switch (ctl->id) {
1017 case V4L2_CID_AUDIO_BALANCE: 994 case V4L2_CID_AUDIO_BALANCE:
@@ -1558,9 +1535,7 @@ static int radio_queryctrl (struct file *file, void *priv,
1558 if (c->id < V4L2_CID_BASE || 1535 if (c->id < V4L2_CID_BASE ||
1559 c->id >= V4L2_CID_LASTP1) 1536 c->id >= V4L2_CID_LASTP1)
1560 return -EINVAL; 1537 return -EINVAL;
1561 if (c->id == V4L2_CID_AUDIO_MUTE || 1538 if (c->id == V4L2_CID_AUDIO_MUTE) {
1562 c->id == V4L2_CID_AUDIO_VOLUME ||
1563 c->id == V4L2_CID_AUDIO_BALANCE) {
1564 for (i = 0; i < CX8800_CTLS; i++) { 1539 for (i = 0; i < CX8800_CTLS; i++) {
1565 if (cx8800_ctls[i].v.id == c->id) 1540 if (cx8800_ctls[i].v.id == c->id)
1566 break; 1541 break;
diff --git a/drivers/media/video/cx88/cx88.h b/drivers/media/video/cx88/cx88.h
index e8c732e7ae4f..c9981e77416a 100644
--- a/drivers/media/video/cx88/cx88.h
+++ b/drivers/media/video/cx88/cx88.h
@@ -398,19 +398,17 @@ static inline struct cx88_core *to_core(struct v4l2_device *v4l2_dev)
398 return container_of(v4l2_dev, struct cx88_core, v4l2_dev); 398 return container_of(v4l2_dev, struct cx88_core, v4l2_dev);
399} 399}
400 400
401#define call_hw(core, grpid, o, f, args...) \ 401#define call_all(core, o, f, args...) \
402 do { \ 402 do { \
403 if (!core->i2c_rc) { \ 403 if (!core->i2c_rc) { \
404 if (core->gate_ctrl) \ 404 if (core->gate_ctrl) \
405 core->gate_ctrl(core, 1); \ 405 core->gate_ctrl(core, 1); \
406 v4l2_device_call_all(&core->v4l2_dev, grpid, o, f, ##args); \ 406 v4l2_device_call_all(&core->v4l2_dev, 0, o, f, ##args); \
407 if (core->gate_ctrl) \ 407 if (core->gate_ctrl) \
408 core->gate_ctrl(core, 0); \ 408 core->gate_ctrl(core, 0); \
409 } \ 409 } \
410 } while (0) 410 } while (0)
411 411
412#define call_all(core, o, f, args...) call_hw(core, 0, o, f, ##args)
413
414struct cx8800_dev; 412struct cx8800_dev;
415struct cx8802_dev; 413struct cx8802_dev;
416 414
diff --git a/drivers/media/video/em28xx/em28xx-video.c b/drivers/media/video/em28xx/em28xx-video.c
index 908e3bc88303..2c3007280032 100644
--- a/drivers/media/video/em28xx/em28xx-video.c
+++ b/drivers/media/video/em28xx/em28xx-video.c
@@ -2377,7 +2377,7 @@ static const struct v4l2_file_operations radio_fops = {
2377 .owner = THIS_MODULE, 2377 .owner = THIS_MODULE,
2378 .open = em28xx_v4l2_open, 2378 .open = em28xx_v4l2_open,
2379 .release = em28xx_v4l2_close, 2379 .release = em28xx_v4l2_close,
2380 .ioctl = video_ioctl2, 2380 .unlocked_ioctl = video_ioctl2,
2381}; 2381};
2382 2382
2383static const struct v4l2_ioctl_ops radio_ioctl_ops = { 2383static const struct v4l2_ioctl_ops radio_ioctl_ops = {
diff --git a/drivers/media/video/mx2_camera.c b/drivers/media/video/mx2_camera.c
index 072bd2d1cfad..13565cba237d 100644
--- a/drivers/media/video/mx2_camera.c
+++ b/drivers/media/video/mx2_camera.c
@@ -807,8 +807,6 @@ static int mx2_camera_set_bus_param(struct soc_camera_device *icd,
807 807
808 if (common_flags & SOCAM_PCLK_SAMPLE_RISING) 808 if (common_flags & SOCAM_PCLK_SAMPLE_RISING)
809 csicr1 |= CSICR1_REDGE; 809 csicr1 |= CSICR1_REDGE;
810 if (common_flags & SOCAM_PCLK_SAMPLE_FALLING)
811 csicr1 |= CSICR1_INV_PCLK;
812 if (common_flags & SOCAM_VSYNC_ACTIVE_HIGH) 810 if (common_flags & SOCAM_VSYNC_ACTIVE_HIGH)
813 csicr1 |= CSICR1_SOF_POL; 811 csicr1 |= CSICR1_SOF_POL;
814 if (common_flags & SOCAM_HSYNC_ACTIVE_HIGH) 812 if (common_flags & SOCAM_HSYNC_ACTIVE_HIGH)
diff --git a/drivers/media/video/s5p-fimc/fimc-capture.c b/drivers/media/video/s5p-fimc/fimc-capture.c
index 1b93207c89e8..2f500809f53d 100644
--- a/drivers/media/video/s5p-fimc/fimc-capture.c
+++ b/drivers/media/video/s5p-fimc/fimc-capture.c
@@ -522,6 +522,7 @@ static int fimc_cap_streamon(struct file *file, void *priv,
522 INIT_LIST_HEAD(&fimc->vid_cap.active_buf_q); 522 INIT_LIST_HEAD(&fimc->vid_cap.active_buf_q);
523 fimc->vid_cap.active_buf_cnt = 0; 523 fimc->vid_cap.active_buf_cnt = 0;
524 fimc->vid_cap.frame_count = 0; 524 fimc->vid_cap.frame_count = 0;
525 fimc->vid_cap.buf_index = fimc_hw_get_frame_index(fimc);
525 526
526 set_bit(ST_CAPT_PEND, &fimc->state); 527 set_bit(ST_CAPT_PEND, &fimc->state);
527 ret = videobuf_streamon(&fimc->vid_cap.vbq); 528 ret = videobuf_streamon(&fimc->vid_cap.vbq);
@@ -652,6 +653,50 @@ static int fimc_cap_s_ctrl(struct file *file, void *priv,
652 return ret; 653 return ret;
653} 654}
654 655
656static int fimc_cap_cropcap(struct file *file, void *fh,
657 struct v4l2_cropcap *cr)
658{
659 struct fimc_frame *f;
660 struct fimc_ctx *ctx = fh;
661 struct fimc_dev *fimc = ctx->fimc_dev;
662
663 if (cr->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
664 return -EINVAL;
665
666 if (mutex_lock_interruptible(&fimc->lock))
667 return -ERESTARTSYS;
668
669 f = &ctx->s_frame;
670 cr->bounds.left = 0;
671 cr->bounds.top = 0;
672 cr->bounds.width = f->o_width;
673 cr->bounds.height = f->o_height;
674 cr->defrect = cr->bounds;
675
676 mutex_unlock(&fimc->lock);
677 return 0;
678}
679
680static int fimc_cap_g_crop(struct file *file, void *fh, struct v4l2_crop *cr)
681{
682 struct fimc_frame *f;
683 struct fimc_ctx *ctx = file->private_data;
684 struct fimc_dev *fimc = ctx->fimc_dev;
685
686
687 if (mutex_lock_interruptible(&fimc->lock))
688 return -ERESTARTSYS;
689
690 f = &ctx->s_frame;
691 cr->c.left = f->offs_h;
692 cr->c.top = f->offs_v;
693 cr->c.width = f->width;
694 cr->c.height = f->height;
695
696 mutex_unlock(&fimc->lock);
697 return 0;
698}
699
655static int fimc_cap_s_crop(struct file *file, void *fh, 700static int fimc_cap_s_crop(struct file *file, void *fh,
656 struct v4l2_crop *cr) 701 struct v4l2_crop *cr)
657{ 702{
@@ -716,9 +761,9 @@ static const struct v4l2_ioctl_ops fimc_capture_ioctl_ops = {
716 .vidioc_g_ctrl = fimc_vidioc_g_ctrl, 761 .vidioc_g_ctrl = fimc_vidioc_g_ctrl,
717 .vidioc_s_ctrl = fimc_cap_s_ctrl, 762 .vidioc_s_ctrl = fimc_cap_s_ctrl,
718 763
719 .vidioc_g_crop = fimc_vidioc_g_crop, 764 .vidioc_g_crop = fimc_cap_g_crop,
720 .vidioc_s_crop = fimc_cap_s_crop, 765 .vidioc_s_crop = fimc_cap_s_crop,
721 .vidioc_cropcap = fimc_vidioc_cropcap, 766 .vidioc_cropcap = fimc_cap_cropcap,
722 767
723 .vidioc_enum_input = fimc_cap_enum_input, 768 .vidioc_enum_input = fimc_cap_enum_input,
724 .vidioc_s_input = fimc_cap_s_input, 769 .vidioc_s_input = fimc_cap_s_input,
@@ -785,7 +830,7 @@ int fimc_register_capture_device(struct fimc_dev *fimc)
785 videobuf_queue_dma_contig_init(&vid_cap->vbq, &fimc_qops, 830 videobuf_queue_dma_contig_init(&vid_cap->vbq, &fimc_qops,
786 vid_cap->v4l2_dev.dev, &fimc->irqlock, 831 vid_cap->v4l2_dev.dev, &fimc->irqlock,
787 V4L2_BUF_TYPE_VIDEO_CAPTURE, V4L2_FIELD_NONE, 832 V4L2_BUF_TYPE_VIDEO_CAPTURE, V4L2_FIELD_NONE,
788 sizeof(struct fimc_vid_buffer), (void *)ctx); 833 sizeof(struct fimc_vid_buffer), (void *)ctx, NULL);
789 834
790 ret = video_register_device(vfd, VFL_TYPE_GRABBER, -1); 835 ret = video_register_device(vfd, VFL_TYPE_GRABBER, -1);
791 if (ret) { 836 if (ret) {
diff --git a/drivers/media/video/s5p-fimc/fimc-core.c b/drivers/media/video/s5p-fimc/fimc-core.c
index 2e7c547894b6..bb99f2d805d3 100644
--- a/drivers/media/video/s5p-fimc/fimc-core.c
+++ b/drivers/media/video/s5p-fimc/fimc-core.c
@@ -50,8 +50,8 @@ static struct fimc_fmt fimc_formats[] = {
50 .planes_cnt = 1, 50 .planes_cnt = 1,
51 .flags = FMT_FLAGS_M2M, 51 .flags = FMT_FLAGS_M2M,
52 }, { 52 }, {
53 .name = "XRGB-8-8-8-8, 24 bpp", 53 .name = "XRGB-8-8-8-8, 32 bpp",
54 .fourcc = V4L2_PIX_FMT_RGB24, 54 .fourcc = V4L2_PIX_FMT_RGB32,
55 .depth = 32, 55 .depth = 32,
56 .color = S5P_FIMC_RGB888, 56 .color = S5P_FIMC_RGB888,
57 .buff_cnt = 1, 57 .buff_cnt = 1,
@@ -983,6 +983,7 @@ int fimc_vidioc_queryctrl(struct file *file, void *priv,
983{ 983{
984 struct fimc_ctx *ctx = priv; 984 struct fimc_ctx *ctx = priv;
985 struct v4l2_queryctrl *c; 985 struct v4l2_queryctrl *c;
986 int ret = -EINVAL;
986 987
987 c = get_ctrl(qc->id); 988 c = get_ctrl(qc->id);
988 if (c) { 989 if (c) {
@@ -990,10 +991,14 @@ int fimc_vidioc_queryctrl(struct file *file, void *priv,
990 return 0; 991 return 0;
991 } 992 }
992 993
993 if (ctx->state & FIMC_CTX_CAP) 994 if (ctx->state & FIMC_CTX_CAP) {
994 return v4l2_subdev_call(ctx->fimc_dev->vid_cap.sd, 995 if (mutex_lock_interruptible(&ctx->fimc_dev->lock))
996 return -ERESTARTSYS;
997 ret = v4l2_subdev_call(ctx->fimc_dev->vid_cap.sd,
995 core, queryctrl, qc); 998 core, queryctrl, qc);
996 return -EINVAL; 999 mutex_unlock(&ctx->fimc_dev->lock);
1000 }
1001 return ret;
997} 1002}
998 1003
999int fimc_vidioc_g_ctrl(struct file *file, void *priv, 1004int fimc_vidioc_g_ctrl(struct file *file, void *priv,
@@ -1115,7 +1120,7 @@ static int fimc_m2m_s_ctrl(struct file *file, void *priv,
1115 return 0; 1120 return 0;
1116} 1121}
1117 1122
1118int fimc_vidioc_cropcap(struct file *file, void *fh, 1123static int fimc_m2m_cropcap(struct file *file, void *fh,
1119 struct v4l2_cropcap *cr) 1124 struct v4l2_cropcap *cr)
1120{ 1125{
1121 struct fimc_frame *frame; 1126 struct fimc_frame *frame;
@@ -1139,7 +1144,7 @@ int fimc_vidioc_cropcap(struct file *file, void *fh,
1139 return 0; 1144 return 0;
1140} 1145}
1141 1146
1142int fimc_vidioc_g_crop(struct file *file, void *fh, struct v4l2_crop *cr) 1147static int fimc_m2m_g_crop(struct file *file, void *fh, struct v4l2_crop *cr)
1143{ 1148{
1144 struct fimc_frame *frame; 1149 struct fimc_frame *frame;
1145 struct fimc_ctx *ctx = file->private_data; 1150 struct fimc_ctx *ctx = file->private_data;
@@ -1167,22 +1172,22 @@ int fimc_try_crop(struct fimc_ctx *ctx, struct v4l2_crop *cr)
1167 struct fimc_frame *f; 1172 struct fimc_frame *f;
1168 u32 min_size, halign; 1173 u32 min_size, halign;
1169 1174
1170 f = (cr->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) ?
1171 &ctx->s_frame : &ctx->d_frame;
1172
1173 if (cr->c.top < 0 || cr->c.left < 0) { 1175 if (cr->c.top < 0 || cr->c.left < 0) {
1174 v4l2_err(&fimc->m2m.v4l2_dev, 1176 v4l2_err(&fimc->m2m.v4l2_dev,
1175 "doesn't support negative values for top & left\n"); 1177 "doesn't support negative values for top & left\n");
1176 return -EINVAL; 1178 return -EINVAL;
1177 } 1179 }
1178 1180
1179 f = ctx_get_frame(ctx, cr->type); 1181 if (cr->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
1180 if (IS_ERR(f)) 1182 f = (ctx->state & FIMC_CTX_CAP) ? &ctx->s_frame : &ctx->d_frame;
1181 return PTR_ERR(f); 1183 else if (cr->type == V4L2_BUF_TYPE_VIDEO_OUTPUT &&
1184 ctx->state & FIMC_CTX_M2M)
1185 f = &ctx->s_frame;
1186 else
1187 return -EINVAL;
1182 1188
1183 min_size = (cr->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) 1189 min_size = (f == &ctx->s_frame) ?
1184 ? fimc->variant->min_inp_pixsize 1190 fimc->variant->min_inp_pixsize : fimc->variant->min_out_pixsize;
1185 : fimc->variant->min_out_pixsize;
1186 1191
1187 if (ctx->state & FIMC_CTX_M2M) { 1192 if (ctx->state & FIMC_CTX_M2M) {
1188 if (fimc->id == 1 && fimc->variant->pix_hoff) 1193 if (fimc->id == 1 && fimc->variant->pix_hoff)
@@ -1233,6 +1238,9 @@ static int fimc_m2m_s_crop(struct file *file, void *fh, struct v4l2_crop *cr)
1233 f = (cr->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) ? 1238 f = (cr->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) ?
1234 &ctx->s_frame : &ctx->d_frame; 1239 &ctx->s_frame : &ctx->d_frame;
1235 1240
1241 if (mutex_lock_interruptible(&fimc->lock))
1242 return -ERESTARTSYS;
1243
1236 spin_lock_irqsave(&ctx->slock, flags); 1244 spin_lock_irqsave(&ctx->slock, flags);
1237 if (~ctx->state & (FIMC_SRC_FMT | FIMC_DST_FMT)) { 1245 if (~ctx->state & (FIMC_SRC_FMT | FIMC_DST_FMT)) {
1238 /* Check to see if scaling ratio is within supported range */ 1246 /* Check to see if scaling ratio is within supported range */
@@ -1241,9 +1249,9 @@ static int fimc_m2m_s_crop(struct file *file, void *fh, struct v4l2_crop *cr)
1241 else 1249 else
1242 ret = fimc_check_scaler_ratio(&cr->c, &ctx->s_frame); 1250 ret = fimc_check_scaler_ratio(&cr->c, &ctx->s_frame);
1243 if (ret) { 1251 if (ret) {
1244 spin_unlock_irqrestore(&ctx->slock, flags);
1245 v4l2_err(&fimc->m2m.v4l2_dev, "Out of scaler range"); 1252 v4l2_err(&fimc->m2m.v4l2_dev, "Out of scaler range");
1246 return -EINVAL; 1253 ret = -EINVAL;
1254 goto scr_unlock;
1247 } 1255 }
1248 } 1256 }
1249 ctx->state |= FIMC_PARAMS; 1257 ctx->state |= FIMC_PARAMS;
@@ -1253,7 +1261,9 @@ static int fimc_m2m_s_crop(struct file *file, void *fh, struct v4l2_crop *cr)
1253 f->width = cr->c.width; 1261 f->width = cr->c.width;
1254 f->height = cr->c.height; 1262 f->height = cr->c.height;
1255 1263
1264scr_unlock:
1256 spin_unlock_irqrestore(&ctx->slock, flags); 1265 spin_unlock_irqrestore(&ctx->slock, flags);
1266 mutex_unlock(&fimc->lock);
1257 return 0; 1267 return 0;
1258} 1268}
1259 1269
@@ -1285,9 +1295,9 @@ static const struct v4l2_ioctl_ops fimc_m2m_ioctl_ops = {
1285 .vidioc_g_ctrl = fimc_vidioc_g_ctrl, 1295 .vidioc_g_ctrl = fimc_vidioc_g_ctrl,
1286 .vidioc_s_ctrl = fimc_m2m_s_ctrl, 1296 .vidioc_s_ctrl = fimc_m2m_s_ctrl,
1287 1297
1288 .vidioc_g_crop = fimc_vidioc_g_crop, 1298 .vidioc_g_crop = fimc_m2m_g_crop,
1289 .vidioc_s_crop = fimc_m2m_s_crop, 1299 .vidioc_s_crop = fimc_m2m_s_crop,
1290 .vidioc_cropcap = fimc_vidioc_cropcap 1300 .vidioc_cropcap = fimc_m2m_cropcap
1291 1301
1292}; 1302};
1293 1303
@@ -1396,7 +1406,7 @@ static const struct v4l2_file_operations fimc_m2m_fops = {
1396 .open = fimc_m2m_open, 1406 .open = fimc_m2m_open,
1397 .release = fimc_m2m_release, 1407 .release = fimc_m2m_release,
1398 .poll = fimc_m2m_poll, 1408 .poll = fimc_m2m_poll,
1399 .ioctl = video_ioctl2, 1409 .unlocked_ioctl = video_ioctl2,
1400 .mmap = fimc_m2m_mmap, 1410 .mmap = fimc_m2m_mmap,
1401}; 1411};
1402 1412
@@ -1736,6 +1746,7 @@ static struct samsung_fimc_variant fimc0_variant_s5pv310 = {
1736 .pix_hoff = 1, 1746 .pix_hoff = 1,
1737 .has_inp_rot = 1, 1747 .has_inp_rot = 1,
1738 .has_out_rot = 1, 1748 .has_out_rot = 1,
1749 .has_cistatus2 = 1,
1739 .min_inp_pixsize = 16, 1750 .min_inp_pixsize = 16,
1740 .min_out_pixsize = 16, 1751 .min_out_pixsize = 16,
1741 .hor_offs_align = 1, 1752 .hor_offs_align = 1,
@@ -1745,6 +1756,7 @@ static struct samsung_fimc_variant fimc0_variant_s5pv310 = {
1745 1756
1746static struct samsung_fimc_variant fimc2_variant_s5pv310 = { 1757static struct samsung_fimc_variant fimc2_variant_s5pv310 = {
1747 .pix_hoff = 1, 1758 .pix_hoff = 1,
1759 .has_cistatus2 = 1,
1748 .min_inp_pixsize = 16, 1760 .min_inp_pixsize = 16,
1749 .min_out_pixsize = 16, 1761 .min_out_pixsize = 16,
1750 .hor_offs_align = 1, 1762 .hor_offs_align = 1,
diff --git a/drivers/media/video/s5p-fimc/fimc-core.h b/drivers/media/video/s5p-fimc/fimc-core.h
index 3e1078516560..4f047d35f8ad 100644
--- a/drivers/media/video/s5p-fimc/fimc-core.h
+++ b/drivers/media/video/s5p-fimc/fimc-core.h
@@ -13,13 +13,15 @@
13 13
14/*#define DEBUG*/ 14/*#define DEBUG*/
15 15
16#include <linux/sched.h>
16#include <linux/types.h> 17#include <linux/types.h>
18#include <linux/videodev2.h>
17#include <media/videobuf-core.h> 19#include <media/videobuf-core.h>
18#include <media/v4l2-device.h> 20#include <media/v4l2-device.h>
19#include <media/v4l2-mem2mem.h> 21#include <media/v4l2-mem2mem.h>
20#include <media/v4l2-mediabus.h> 22#include <media/v4l2-mediabus.h>
21#include <media/s3c_fimc.h> 23#include <media/s3c_fimc.h>
22#include <linux/videodev2.h> 24
23#include "regs-fimc.h" 25#include "regs-fimc.h"
24 26
25#define err(fmt, args...) \ 27#define err(fmt, args...) \
@@ -369,6 +371,7 @@ struct fimc_pix_limit {
369 * @pix_hoff: indicate whether horizontal offset is in pixels or in bytes 371 * @pix_hoff: indicate whether horizontal offset is in pixels or in bytes
370 * @has_inp_rot: set if has input rotator 372 * @has_inp_rot: set if has input rotator
371 * @has_out_rot: set if has output rotator 373 * @has_out_rot: set if has output rotator
374 * @has_cistatus2: 1 if CISTATUS2 register is present in this IP revision
372 * @pix_limit: pixel size constraints for the scaler 375 * @pix_limit: pixel size constraints for the scaler
373 * @min_inp_pixsize: minimum input pixel size 376 * @min_inp_pixsize: minimum input pixel size
374 * @min_out_pixsize: minimum output pixel size 377 * @min_out_pixsize: minimum output pixel size
@@ -379,6 +382,7 @@ struct samsung_fimc_variant {
379 unsigned int pix_hoff:1; 382 unsigned int pix_hoff:1;
380 unsigned int has_inp_rot:1; 383 unsigned int has_inp_rot:1;
381 unsigned int has_out_rot:1; 384 unsigned int has_out_rot:1;
385 unsigned int has_cistatus2:1;
382 struct fimc_pix_limit *pix_limit; 386 struct fimc_pix_limit *pix_limit;
383 u16 min_inp_pixsize; 387 u16 min_inp_pixsize;
384 u16 min_out_pixsize; 388 u16 min_out_pixsize;
@@ -554,11 +558,19 @@ static inline struct fimc_frame *ctx_get_frame(struct fimc_ctx *ctx,
554 return frame; 558 return frame;
555} 559}
556 560
561/* Return an index to the buffer actually being written. */
557static inline u32 fimc_hw_get_frame_index(struct fimc_dev *dev) 562static inline u32 fimc_hw_get_frame_index(struct fimc_dev *dev)
558{ 563{
559 u32 reg = readl(dev->regs + S5P_CISTATUS); 564 u32 reg;
560 return (reg & S5P_CISTATUS_FRAMECNT_MASK) >> 565
561 S5P_CISTATUS_FRAMECNT_SHIFT; 566 if (dev->variant->has_cistatus2) {
567 reg = readl(dev->regs + S5P_CISTATUS2) & 0x3F;
568 return reg > 0 ? --reg : reg;
569 } else {
570 reg = readl(dev->regs + S5P_CISTATUS);
571 return (reg & S5P_CISTATUS_FRAMECNT_MASK) >>
572 S5P_CISTATUS_FRAMECNT_SHIFT;
573 }
562} 574}
563 575
564/* -----------------------------------------------------*/ 576/* -----------------------------------------------------*/
@@ -594,10 +606,6 @@ int fimc_vidioc_g_fmt(struct file *file, void *priv,
594 struct v4l2_format *f); 606 struct v4l2_format *f);
595int fimc_vidioc_try_fmt(struct file *file, void *priv, 607int fimc_vidioc_try_fmt(struct file *file, void *priv,
596 struct v4l2_format *f); 608 struct v4l2_format *f);
597int fimc_vidioc_g_crop(struct file *file, void *fh,
598 struct v4l2_crop *cr);
599int fimc_vidioc_cropcap(struct file *file, void *fh,
600 struct v4l2_cropcap *cr);
601int fimc_vidioc_queryctrl(struct file *file, void *priv, 609int fimc_vidioc_queryctrl(struct file *file, void *priv,
602 struct v4l2_queryctrl *qc); 610 struct v4l2_queryctrl *qc);
603int fimc_vidioc_g_ctrl(struct file *file, void *priv, 611int fimc_vidioc_g_ctrl(struct file *file, void *priv,
diff --git a/drivers/media/video/s5p-fimc/regs-fimc.h b/drivers/media/video/s5p-fimc/regs-fimc.h
index a57daedb5b5c..57e33f84fcfa 100644
--- a/drivers/media/video/s5p-fimc/regs-fimc.h
+++ b/drivers/media/video/s5p-fimc/regs-fimc.h
@@ -165,6 +165,9 @@
165#define S5P_CISTATUS_VVALID_A (1 << 15) 165#define S5P_CISTATUS_VVALID_A (1 << 15)
166#define S5P_CISTATUS_VVALID_B (1 << 14) 166#define S5P_CISTATUS_VVALID_B (1 << 14)
167 167
168/* Indexes to the last and the currently processed buffer. */
169#define S5P_CISTATUS2 0x68
170
168/* Image capture control */ 171/* Image capture control */
169#define S5P_CIIMGCPT 0xc0 172#define S5P_CIIMGCPT 0xc0
170#define S5P_CIIMGCPT_IMGCPTEN (1 << 31) 173#define S5P_CIIMGCPT_IMGCPTEN (1 << 31)
diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c
index 5c209afb0ac8..2486520582f2 100644
--- a/drivers/media/video/sh_mobile_ceu_camera.c
+++ b/drivers/media/video/sh_mobile_ceu_camera.c
@@ -1980,7 +1980,7 @@ static int __devinit sh_mobile_ceu_probe(struct platform_device *pdev)
1980 * we complete the completion. 1980 * we complete the completion.
1981 */ 1981 */
1982 1982
1983 if (!csi2->driver || !csi2->driver->owner) { 1983 if (!csi2->driver) {
1984 complete(&wait.completion); 1984 complete(&wait.completion);
1985 /* Either too late, or probing failed */ 1985 /* Either too late, or probing failed */
1986 bus_unregister_notifier(&platform_bus_type, &wait.notifier); 1986 bus_unregister_notifier(&platform_bus_type, &wait.notifier);
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 335120c2021b..052bd6dfa5a7 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -405,13 +405,13 @@ static int soc_camera_open(struct file *file)
405 ret = soc_camera_set_fmt(icd, &f); 405 ret = soc_camera_set_fmt(icd, &f);
406 if (ret < 0) 406 if (ret < 0)
407 goto esfmt; 407 goto esfmt;
408
409 ici->ops->init_videobuf(&icd->vb_vidq, icd);
408 } 410 }
409 411
410 file->private_data = icd; 412 file->private_data = icd;
411 dev_dbg(&icd->dev, "camera device open\n"); 413 dev_dbg(&icd->dev, "camera device open\n");
412 414
413 ici->ops->init_videobuf(&icd->vb_vidq, icd);
414
415 mutex_unlock(&icd->video_lock); 415 mutex_unlock(&icd->video_lock);
416 416
417 return 0; 417 return 0;
diff --git a/drivers/media/video/wm8775.c b/drivers/media/video/wm8775.c
index 135525649086..fe8ef6419f83 100644
--- a/drivers/media/video/wm8775.c
+++ b/drivers/media/video/wm8775.c
@@ -35,7 +35,6 @@
35#include <media/v4l2-device.h> 35#include <media/v4l2-device.h>
36#include <media/v4l2-chip-ident.h> 36#include <media/v4l2-chip-ident.h>
37#include <media/v4l2-ctrls.h> 37#include <media/v4l2-ctrls.h>
38#include <media/wm8775.h>
39 38
40MODULE_DESCRIPTION("wm8775 driver"); 39MODULE_DESCRIPTION("wm8775 driver");
41MODULE_AUTHOR("Ulf Eklund, Hans Verkuil"); 40MODULE_AUTHOR("Ulf Eklund, Hans Verkuil");
@@ -51,16 +50,10 @@ enum {
51 TOT_REGS 50 TOT_REGS
52}; 51};
53 52
54#define ALC_HOLD 0x85 /* R17: use zero cross detection, ALC hold time 42.6 ms */
55#define ALC_EN 0x100 /* R17: ALC enable */
56
57struct wm8775_state { 53struct wm8775_state {
58 struct v4l2_subdev sd; 54 struct v4l2_subdev sd;
59 struct v4l2_ctrl_handler hdl; 55 struct v4l2_ctrl_handler hdl;
60 struct v4l2_ctrl *mute; 56 struct v4l2_ctrl *mute;
61 struct v4l2_ctrl *vol;
62 struct v4l2_ctrl *bal;
63 struct v4l2_ctrl *loud;
64 u8 input; /* Last selected input (0-0xf) */ 57 u8 input; /* Last selected input (0-0xf) */
65}; 58};
66 59
@@ -92,30 +85,6 @@ static int wm8775_write(struct v4l2_subdev *sd, int reg, u16 val)
92 return -1; 85 return -1;
93} 86}
94 87
95static void wm8775_set_audio(struct v4l2_subdev *sd, int quietly)
96{
97 struct wm8775_state *state = to_state(sd);
98 u8 vol_l, vol_r;
99 int muted = 0 != state->mute->val;
100 u16 volume = (u16)state->vol->val;
101 u16 balance = (u16)state->bal->val;
102
103 /* normalize ( 65535 to 0 -> 255 to 0 (+24dB to -103dB) ) */
104 vol_l = (min(65536 - balance, 32768) * volume) >> 23;
105 vol_r = (min(balance, (u16)32768) * volume) >> 23;
106
107 /* Mute */
108 if (muted || quietly)
109 wm8775_write(sd, R21, 0x0c0 | state->input);
110
111 wm8775_write(sd, R14, vol_l | 0x100); /* 0x100= Left channel ADC zero cross enable */
112 wm8775_write(sd, R15, vol_r | 0x100); /* 0x100= Right channel ADC zero cross enable */
113
114 /* Un-mute */
115 if (!muted)
116 wm8775_write(sd, R21, state->input);
117}
118
119static int wm8775_s_routing(struct v4l2_subdev *sd, 88static int wm8775_s_routing(struct v4l2_subdev *sd,
120 u32 input, u32 output, u32 config) 89 u32 input, u32 output, u32 config)
121{ 90{
@@ -133,26 +102,25 @@ static int wm8775_s_routing(struct v4l2_subdev *sd,
133 state->input = input; 102 state->input = input;
134 if (!v4l2_ctrl_g_ctrl(state->mute)) 103 if (!v4l2_ctrl_g_ctrl(state->mute))
135 return 0; 104 return 0;
136 if (!v4l2_ctrl_g_ctrl(state->vol)) 105 wm8775_write(sd, R21, 0x0c0);
137 return 0; 106 wm8775_write(sd, R14, 0x1d4);
138 if (!v4l2_ctrl_g_ctrl(state->bal)) 107 wm8775_write(sd, R15, 0x1d4);
139 return 0; 108 wm8775_write(sd, R21, 0x100 + state->input);
140 wm8775_set_audio(sd, 1);
141 return 0; 109 return 0;
142} 110}
143 111
144static int wm8775_s_ctrl(struct v4l2_ctrl *ctrl) 112static int wm8775_s_ctrl(struct v4l2_ctrl *ctrl)
145{ 113{
146 struct v4l2_subdev *sd = to_sd(ctrl); 114 struct v4l2_subdev *sd = to_sd(ctrl);
115 struct wm8775_state *state = to_state(sd);
147 116
148 switch (ctrl->id) { 117 switch (ctrl->id) {
149 case V4L2_CID_AUDIO_MUTE: 118 case V4L2_CID_AUDIO_MUTE:
150 case V4L2_CID_AUDIO_VOLUME: 119 wm8775_write(sd, R21, 0x0c0);
151 case V4L2_CID_AUDIO_BALANCE: 120 wm8775_write(sd, R14, 0x1d4);
152 wm8775_set_audio(sd, 0); 121 wm8775_write(sd, R15, 0x1d4);
153 return 0; 122 if (!ctrl->val)
154 case V4L2_CID_AUDIO_LOUDNESS: 123 wm8775_write(sd, R21, 0x100 + state->input);
155 wm8775_write(sd, R17, (ctrl->val ? ALC_EN : 0) | ALC_HOLD);
156 return 0; 124 return 0;
157 } 125 }
158 return -EINVAL; 126 return -EINVAL;
@@ -176,7 +144,16 @@ static int wm8775_log_status(struct v4l2_subdev *sd)
176 144
177static int wm8775_s_frequency(struct v4l2_subdev *sd, struct v4l2_frequency *freq) 145static int wm8775_s_frequency(struct v4l2_subdev *sd, struct v4l2_frequency *freq)
178{ 146{
179 wm8775_set_audio(sd, 0); 147 struct wm8775_state *state = to_state(sd);
148
149 /* If I remove this, then it can happen that I have no
150 sound the first time I tune from static to a valid channel.
151 It's difficult to reproduce and is almost certainly related
152 to the zero cross detect circuit. */
153 wm8775_write(sd, R21, 0x0c0);
154 wm8775_write(sd, R14, 0x1d4);
155 wm8775_write(sd, R15, 0x1d4);
156 wm8775_write(sd, R21, 0x100 + state->input);
180 return 0; 157 return 0;
181} 158}
182 159
@@ -226,7 +203,6 @@ static int wm8775_probe(struct i2c_client *client,
226{ 203{
227 struct wm8775_state *state; 204 struct wm8775_state *state;
228 struct v4l2_subdev *sd; 205 struct v4l2_subdev *sd;
229 int err;
230 206
231 /* Check if the adapter supports the needed features */ 207 /* Check if the adapter supports the needed features */
232 if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA)) 208 if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA))
@@ -240,21 +216,15 @@ static int wm8775_probe(struct i2c_client *client,
240 return -ENOMEM; 216 return -ENOMEM;
241 sd = &state->sd; 217 sd = &state->sd;
242 v4l2_i2c_subdev_init(sd, client, &wm8775_ops); 218 v4l2_i2c_subdev_init(sd, client, &wm8775_ops);
243 sd->grp_id = WM8775_GID; /* subdev group id */
244 state->input = 2; 219 state->input = 2;
245 220
246 v4l2_ctrl_handler_init(&state->hdl, 4); 221 v4l2_ctrl_handler_init(&state->hdl, 1);
247 state->mute = v4l2_ctrl_new_std(&state->hdl, &wm8775_ctrl_ops, 222 state->mute = v4l2_ctrl_new_std(&state->hdl, &wm8775_ctrl_ops,
248 V4L2_CID_AUDIO_MUTE, 0, 1, 1, 0); 223 V4L2_CID_AUDIO_MUTE, 0, 1, 1, 0);
249 state->vol = v4l2_ctrl_new_std(&state->hdl, &wm8775_ctrl_ops,
250 V4L2_CID_AUDIO_VOLUME, 0, 65535, (65535+99)/100, 0xCF00); /* 0dB*/
251 state->bal = v4l2_ctrl_new_std(&state->hdl, &wm8775_ctrl_ops,
252 V4L2_CID_AUDIO_BALANCE, 0, 65535, (65535+99)/100, 32768);
253 state->loud = v4l2_ctrl_new_std(&state->hdl, &wm8775_ctrl_ops,
254 V4L2_CID_AUDIO_LOUDNESS, 0, 1, 1, 1);
255 sd->ctrl_handler = &state->hdl; 224 sd->ctrl_handler = &state->hdl;
256 err = state->hdl.error; 225 if (state->hdl.error) {
257 if (err) { 226 int err = state->hdl.error;
227
258 v4l2_ctrl_handler_free(&state->hdl); 228 v4l2_ctrl_handler_free(&state->hdl);
259 kfree(state); 229 kfree(state);
260 return err; 230 return err;
@@ -266,25 +236,29 @@ static int wm8775_probe(struct i2c_client *client,
266 wm8775_write(sd, R23, 0x000); 236 wm8775_write(sd, R23, 0x000);
267 /* Disable zero cross detect timeout */ 237 /* Disable zero cross detect timeout */
268 wm8775_write(sd, R7, 0x000); 238 wm8775_write(sd, R7, 0x000);
269 /* HPF enable, I2S mode, 24-bit */ 239 /* Left justified, 24-bit mode */
270 wm8775_write(sd, R11, 0x022); 240 wm8775_write(sd, R11, 0x021);
271 /* Master mode, clock ratio 256fs */ 241 /* Master mode, clock ratio 256fs */
272 wm8775_write(sd, R12, 0x102); 242 wm8775_write(sd, R12, 0x102);
273 /* Powered up */ 243 /* Powered up */
274 wm8775_write(sd, R13, 0x000); 244 wm8775_write(sd, R13, 0x000);
275 /* ALC stereo, ALC target level -5dB FS, ALC max gain +8dB */ 245 /* ADC gain +2.5dB, enable zero cross */
276 wm8775_write(sd, R16, 0x1bb); 246 wm8775_write(sd, R14, 0x1d4);
277 /* Set ALC mode and hold time */ 247 /* ADC gain +2.5dB, enable zero cross */
278 wm8775_write(sd, R17, (state->loud->val ? ALC_EN : 0) | ALC_HOLD); 248 wm8775_write(sd, R15, 0x1d4);
249 /* ALC Stereo, ALC target level -1dB FS max gain +8dB */
250 wm8775_write(sd, R16, 0x1bf);
251 /* Enable gain control, use zero cross detection,
252 ALC hold time 42.6 ms */
253 wm8775_write(sd, R17, 0x185);
279 /* ALC gain ramp up delay 34 s, ALC gain ramp down delay 33 ms */ 254 /* ALC gain ramp up delay 34 s, ALC gain ramp down delay 33 ms */
280 wm8775_write(sd, R18, 0x0a2); 255 wm8775_write(sd, R18, 0x0a2);
281 /* Enable noise gate, threshold -72dBfs */ 256 /* Enable noise gate, threshold -72dBfs */
282 wm8775_write(sd, R19, 0x005); 257 wm8775_write(sd, R19, 0x005);
283 /* Transient window 4ms, ALC min gain -5dB */ 258 /* Transient window 4ms, lower PGA gain limit -1dB */
284 wm8775_write(sd, R20, 0x0fb); 259 wm8775_write(sd, R20, 0x07a);
285 260 /* LRBOTH = 1, use input 2. */
286 wm8775_set_audio(sd, 1); /* set volume/mute/mux */ 261 wm8775_write(sd, R21, 0x102);
287
288 return 0; 262 return 0;
289} 263}
290 264
diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index dbe1c93c1af3..d9640a623ff4 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c
@@ -303,7 +303,7 @@ static irqreturn_t ab8500_irq(int irq, void *dev)
303 continue; 303 continue;
304 304
305 do { 305 do {
306 int bit = __ffs(status); 306 int bit = __ffs(value);
307 int line = i * 8 + bit; 307 int line = i * 8 + bit;
308 308
309 handle_nested_irq(ab8500->irq_base + line); 309 handle_nested_irq(ab8500->irq_base + line);
diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index 7d2563fc15c6..76cadcf3b1fe 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -1455,7 +1455,11 @@ int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
1455 dev_err(wm831x->dev, "Failed to read parent ID: %d\n", ret); 1455 dev_err(wm831x->dev, "Failed to read parent ID: %d\n", ret);
1456 goto err; 1456 goto err;
1457 } 1457 }
1458 if (ret != 0x6204) { 1458 switch (ret) {
1459 case 0x6204:
1460 case 0x6246:
1461 break;
1462 default:
1459 dev_err(wm831x->dev, "Device is not a WM831x: ID %x\n", ret); 1463 dev_err(wm831x->dev, "Device is not a WM831x: ID %x\n", ret);
1460 ret = -EINVAL; 1464 ret = -EINVAL;
1461 goto err; 1465 goto err;
@@ -1620,7 +1624,7 @@ int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
1620 case WM8325: 1624 case WM8325:
1621 ret = mfd_add_devices(wm831x->dev, -1, 1625 ret = mfd_add_devices(wm831x->dev, -1,
1622 wm8320_devs, ARRAY_SIZE(wm8320_devs), 1626 wm8320_devs, ARRAY_SIZE(wm8320_devs),
1623 NULL, 0); 1627 NULL, wm831x->irq_base);
1624 break; 1628 break;
1625 1629
1626 default: 1630 default:
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 31ae07a36576..57dcf8fa774a 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1773,6 +1773,7 @@ int mmc_pm_notify(struct notifier_block *notify_block,
1773 1773
1774 case PM_POST_SUSPEND: 1774 case PM_POST_SUSPEND:
1775 case PM_POST_HIBERNATION: 1775 case PM_POST_HIBERNATION:
1776 case PM_POST_RESTORE:
1776 1777
1777 spin_lock_irqsave(&host->lock, flags); 1778 spin_lock_irqsave(&host->lock, flags);
1778 host->rescan_disable = 0; 1779 host->rescan_disable = 0;
diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c
index 591ab540b407..d3e6a962f423 100644
--- a/drivers/mmc/host/at91_mci.c
+++ b/drivers/mmc/host/at91_mci.c
@@ -69,6 +69,7 @@
69#include <linux/highmem.h> 69#include <linux/highmem.h>
70 70
71#include <linux/mmc/host.h> 71#include <linux/mmc/host.h>
72#include <linux/mmc/sdio.h>
72 73
73#include <asm/io.h> 74#include <asm/io.h>
74#include <asm/irq.h> 75#include <asm/irq.h>
@@ -493,10 +494,14 @@ static void at91_mci_send_command(struct at91mci_host *host, struct mmc_command
493 else if (data->flags & MMC_DATA_WRITE) 494 else if (data->flags & MMC_DATA_WRITE)
494 cmdr |= AT91_MCI_TRCMD_START; 495 cmdr |= AT91_MCI_TRCMD_START;
495 496
496 if (data->flags & MMC_DATA_STREAM) 497 if (cmd->opcode == SD_IO_RW_EXTENDED) {
497 cmdr |= AT91_MCI_TRTYP_STREAM; 498 cmdr |= AT91_MCI_TRTYP_SDIO_BLOCK;
498 if (data->blocks > 1) 499 } else {
499 cmdr |= AT91_MCI_TRTYP_MULTIPLE; 500 if (data->flags & MMC_DATA_STREAM)
501 cmdr |= AT91_MCI_TRTYP_STREAM;
502 if (data->blocks > 1)
503 cmdr |= AT91_MCI_TRTYP_MULTIPLE;
504 }
500 } 505 }
501 else { 506 else {
502 block_length = 0; 507 block_length = 0;
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 301351a5d838..ad2a7a032cdf 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -26,6 +26,7 @@
26#include <linux/stat.h> 26#include <linux/stat.h>
27 27
28#include <linux/mmc/host.h> 28#include <linux/mmc/host.h>
29#include <linux/mmc/sdio.h>
29 30
30#include <mach/atmel-mci.h> 31#include <mach/atmel-mci.h>
31#include <linux/atmel-mci.h> 32#include <linux/atmel-mci.h>
@@ -532,12 +533,17 @@ static u32 atmci_prepare_command(struct mmc_host *mmc,
532 data = cmd->data; 533 data = cmd->data;
533 if (data) { 534 if (data) {
534 cmdr |= MCI_CMDR_START_XFER; 535 cmdr |= MCI_CMDR_START_XFER;
535 if (data->flags & MMC_DATA_STREAM) 536
536 cmdr |= MCI_CMDR_STREAM; 537 if (cmd->opcode == SD_IO_RW_EXTENDED) {
537 else if (data->blocks > 1) 538 cmdr |= MCI_CMDR_SDIO_BLOCK;
538 cmdr |= MCI_CMDR_MULTI_BLOCK; 539 } else {
539 else 540 if (data->flags & MMC_DATA_STREAM)
540 cmdr |= MCI_CMDR_BLOCK; 541 cmdr |= MCI_CMDR_STREAM;
542 else if (data->blocks > 1)
543 cmdr |= MCI_CMDR_MULTI_BLOCK;
544 else
545 cmdr |= MCI_CMDR_BLOCK;
546 }
541 547
542 if (data->flags & MMC_DATA_READ) 548 if (data->flags & MMC_DATA_READ)
543 cmdr |= MCI_CMDR_TRDIR_READ; 549 cmdr |= MCI_CMDR_TRDIR_READ;
diff --git a/drivers/net/atl1c/atl1c_main.c b/drivers/net/atl1c/atl1c_main.c
index 09b099bfab2b..bdf11d89a499 100644
--- a/drivers/net/atl1c/atl1c_main.c
+++ b/drivers/net/atl1c/atl1c_main.c
@@ -702,6 +702,7 @@ static int __devinit atl1c_sw_init(struct atl1c_adapter *adapter)
702 702
703 703
704 adapter->wol = 0; 704 adapter->wol = 0;
705 device_set_wakeup_enable(&pdev->dev, false);
705 adapter->link_speed = SPEED_0; 706 adapter->link_speed = SPEED_0;
706 adapter->link_duplex = FULL_DUPLEX; 707 adapter->link_duplex = FULL_DUPLEX;
707 adapter->num_rx_queues = AT_DEF_RECEIVE_QUEUE; 708 adapter->num_rx_queues = AT_DEF_RECEIVE_QUEUE;
@@ -2444,8 +2445,9 @@ static int atl1c_close(struct net_device *netdev)
2444 return 0; 2445 return 0;
2445} 2446}
2446 2447
2447static int atl1c_suspend(struct pci_dev *pdev, pm_message_t state) 2448static int atl1c_suspend(struct device *dev)
2448{ 2449{
2450 struct pci_dev *pdev = to_pci_dev(dev);
2449 struct net_device *netdev = pci_get_drvdata(pdev); 2451 struct net_device *netdev = pci_get_drvdata(pdev);
2450 struct atl1c_adapter *adapter = netdev_priv(netdev); 2452 struct atl1c_adapter *adapter = netdev_priv(netdev);
2451 struct atl1c_hw *hw = &adapter->hw; 2453 struct atl1c_hw *hw = &adapter->hw;
@@ -2454,7 +2456,6 @@ static int atl1c_suspend(struct pci_dev *pdev, pm_message_t state)
2454 u32 wol_ctrl_data = 0; 2456 u32 wol_ctrl_data = 0;
2455 u16 mii_intr_status_data = 0; 2457 u16 mii_intr_status_data = 0;
2456 u32 wufc = adapter->wol; 2458 u32 wufc = adapter->wol;
2457 int retval = 0;
2458 2459
2459 atl1c_disable_l0s_l1(hw); 2460 atl1c_disable_l0s_l1(hw);
2460 if (netif_running(netdev)) { 2461 if (netif_running(netdev)) {
@@ -2462,9 +2463,6 @@ static int atl1c_suspend(struct pci_dev *pdev, pm_message_t state)
2462 atl1c_down(adapter); 2463 atl1c_down(adapter);
2463 } 2464 }
2464 netif_device_detach(netdev); 2465 netif_device_detach(netdev);
2465 retval = pci_save_state(pdev);
2466 if (retval)
2467 return retval;
2468 2466
2469 if (wufc) 2467 if (wufc)
2470 if (atl1c_phy_power_saving(hw) != 0) 2468 if (atl1c_phy_power_saving(hw) != 0)
@@ -2525,12 +2523,8 @@ static int atl1c_suspend(struct pci_dev *pdev, pm_message_t state)
2525 AT_WRITE_REG(hw, REG_WOL_CTRL, wol_ctrl_data); 2523 AT_WRITE_REG(hw, REG_WOL_CTRL, wol_ctrl_data);
2526 AT_WRITE_REG(hw, REG_MAC_CTRL, mac_ctrl_data); 2524 AT_WRITE_REG(hw, REG_MAC_CTRL, mac_ctrl_data);
2527 2525
2528 /* pcie patch */
2529 device_set_wakeup_enable(&pdev->dev, 1);
2530
2531 AT_WRITE_REG(hw, REG_GPHY_CTRL, GPHY_CTRL_DEFAULT | 2526 AT_WRITE_REG(hw, REG_GPHY_CTRL, GPHY_CTRL_DEFAULT |
2532 GPHY_CTRL_EXT_RESET); 2527 GPHY_CTRL_EXT_RESET);
2533 pci_prepare_to_sleep(pdev);
2534 } else { 2528 } else {
2535 AT_WRITE_REG(hw, REG_GPHY_CTRL, GPHY_CTRL_POWER_SAVING); 2529 AT_WRITE_REG(hw, REG_GPHY_CTRL, GPHY_CTRL_POWER_SAVING);
2536 master_ctrl_data |= MASTER_CTRL_CLK_SEL_DIS; 2530 master_ctrl_data |= MASTER_CTRL_CLK_SEL_DIS;
@@ -2540,25 +2534,17 @@ static int atl1c_suspend(struct pci_dev *pdev, pm_message_t state)
2540 AT_WRITE_REG(hw, REG_MAC_CTRL, mac_ctrl_data); 2534 AT_WRITE_REG(hw, REG_MAC_CTRL, mac_ctrl_data);
2541 AT_WRITE_REG(hw, REG_WOL_CTRL, 0); 2535 AT_WRITE_REG(hw, REG_WOL_CTRL, 0);
2542 hw->phy_configured = false; /* re-init PHY when resume */ 2536 hw->phy_configured = false; /* re-init PHY when resume */
2543 pci_enable_wake(pdev, pci_choose_state(pdev, state), 0);
2544 } 2537 }
2545 2538
2546 pci_disable_device(pdev);
2547 pci_set_power_state(pdev, pci_choose_state(pdev, state));
2548
2549 return 0; 2539 return 0;
2550} 2540}
2551 2541
2552static int atl1c_resume(struct pci_dev *pdev) 2542static int atl1c_resume(struct device *dev)
2553{ 2543{
2544 struct pci_dev *pdev = to_pci_dev(dev);
2554 struct net_device *netdev = pci_get_drvdata(pdev); 2545 struct net_device *netdev = pci_get_drvdata(pdev);
2555 struct atl1c_adapter *adapter = netdev_priv(netdev); 2546 struct atl1c_adapter *adapter = netdev_priv(netdev);
2556 2547
2557 pci_set_power_state(pdev, PCI_D0);
2558 pci_restore_state(pdev);
2559 pci_enable_wake(pdev, PCI_D3hot, 0);
2560 pci_enable_wake(pdev, PCI_D3cold, 0);
2561
2562 AT_WRITE_REG(&adapter->hw, REG_WOL_CTRL, 0); 2548 AT_WRITE_REG(&adapter->hw, REG_WOL_CTRL, 0);
2563 atl1c_reset_pcie(&adapter->hw, ATL1C_PCIE_L0S_L1_DISABLE | 2549 atl1c_reset_pcie(&adapter->hw, ATL1C_PCIE_L0S_L1_DISABLE |
2564 ATL1C_PCIE_PHY_RESET); 2550 ATL1C_PCIE_PHY_RESET);
@@ -2582,7 +2568,12 @@ static int atl1c_resume(struct pci_dev *pdev)
2582 2568
2583static void atl1c_shutdown(struct pci_dev *pdev) 2569static void atl1c_shutdown(struct pci_dev *pdev)
2584{ 2570{
2585 atl1c_suspend(pdev, PMSG_SUSPEND); 2571 struct net_device *netdev = pci_get_drvdata(pdev);
2572 struct atl1c_adapter *adapter = netdev_priv(netdev);
2573
2574 atl1c_suspend(&pdev->dev);
2575 pci_wake_from_d3(pdev, adapter->wol);
2576 pci_set_power_state(pdev, PCI_D3hot);
2586} 2577}
2587 2578
2588static const struct net_device_ops atl1c_netdev_ops = { 2579static const struct net_device_ops atl1c_netdev_ops = {
@@ -2886,16 +2877,16 @@ static struct pci_error_handlers atl1c_err_handler = {
2886 .resume = atl1c_io_resume, 2877 .resume = atl1c_io_resume,
2887}; 2878};
2888 2879
2880static SIMPLE_DEV_PM_OPS(atl1c_pm_ops, atl1c_suspend, atl1c_resume);
2881
2889static struct pci_driver atl1c_driver = { 2882static struct pci_driver atl1c_driver = {
2890 .name = atl1c_driver_name, 2883 .name = atl1c_driver_name,
2891 .id_table = atl1c_pci_tbl, 2884 .id_table = atl1c_pci_tbl,
2892 .probe = atl1c_probe, 2885 .probe = atl1c_probe,
2893 .remove = __devexit_p(atl1c_remove), 2886 .remove = __devexit_p(atl1c_remove),
2894 /* Power Managment Hooks */
2895 .suspend = atl1c_suspend,
2896 .resume = atl1c_resume,
2897 .shutdown = atl1c_shutdown, 2887 .shutdown = atl1c_shutdown,
2898 .err_handler = &atl1c_err_handler 2888 .err_handler = &atl1c_err_handler,
2889 .driver.pm = &atl1c_pm_ops,
2899}; 2890};
2900 2891
2901/* 2892/*
diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c
index 53363108994e..3acf5123a6ef 100644
--- a/drivers/net/atlx/atl1.c
+++ b/drivers/net/atlx/atl1.c
@@ -3504,6 +3504,8 @@ static int atl1_set_ringparam(struct net_device *netdev,
3504 struct atl1_rfd_ring rfd_old, rfd_new; 3504 struct atl1_rfd_ring rfd_old, rfd_new;
3505 struct atl1_rrd_ring rrd_old, rrd_new; 3505 struct atl1_rrd_ring rrd_old, rrd_new;
3506 struct atl1_ring_header rhdr_old, rhdr_new; 3506 struct atl1_ring_header rhdr_old, rhdr_new;
3507 struct atl1_smb smb;
3508 struct atl1_cmb cmb;
3507 int err; 3509 int err;
3508 3510
3509 tpd_old = adapter->tpd_ring; 3511 tpd_old = adapter->tpd_ring;
@@ -3544,11 +3546,19 @@ static int atl1_set_ringparam(struct net_device *netdev,
3544 adapter->rrd_ring = rrd_old; 3546 adapter->rrd_ring = rrd_old;
3545 adapter->tpd_ring = tpd_old; 3547 adapter->tpd_ring = tpd_old;
3546 adapter->ring_header = rhdr_old; 3548 adapter->ring_header = rhdr_old;
3549 /*
3550 * Save SMB and CMB, since atl1_free_ring_resources
3551 * will clear them.
3552 */
3553 smb = adapter->smb;
3554 cmb = adapter->cmb;
3547 atl1_free_ring_resources(adapter); 3555 atl1_free_ring_resources(adapter);
3548 adapter->rfd_ring = rfd_new; 3556 adapter->rfd_ring = rfd_new;
3549 adapter->rrd_ring = rrd_new; 3557 adapter->rrd_ring = rrd_new;
3550 adapter->tpd_ring = tpd_new; 3558 adapter->tpd_ring = tpd_new;
3551 adapter->ring_header = rhdr_new; 3559 adapter->ring_header = rhdr_new;
3560 adapter->smb = smb;
3561 adapter->cmb = cmb;
3552 3562
3553 err = atl1_up(adapter); 3563 err = atl1_up(adapter);
3554 if (err) 3564 if (err)
diff --git a/drivers/net/benet/be.h b/drivers/net/benet/be.h
index 4594a28b1f66..d64313b7090e 100644
--- a/drivers/net/benet/be.h
+++ b/drivers/net/benet/be.h
@@ -234,7 +234,7 @@ struct be_adapter {
234 u8 __iomem *db; /* Door Bell */ 234 u8 __iomem *db; /* Door Bell */
235 u8 __iomem *pcicfg; /* PCI config space */ 235 u8 __iomem *pcicfg; /* PCI config space */
236 236
237 spinlock_t mbox_lock; /* For serializing mbox cmds to BE card */ 237 struct mutex mbox_lock; /* For serializing mbox cmds to BE card */
238 struct be_dma_mem mbox_mem; 238 struct be_dma_mem mbox_mem;
239 /* Mbox mem is adjusted to align to 16 bytes. The allocated addr 239 /* Mbox mem is adjusted to align to 16 bytes. The allocated addr
240 * is stored for freeing purpose */ 240 * is stored for freeing purpose */
diff --git a/drivers/net/benet/be_cmds.c b/drivers/net/benet/be_cmds.c
index e4465d222a7d..1c8c79c9d214 100644
--- a/drivers/net/benet/be_cmds.c
+++ b/drivers/net/benet/be_cmds.c
@@ -462,7 +462,8 @@ int be_cmd_fw_init(struct be_adapter *adapter)
462 u8 *wrb; 462 u8 *wrb;
463 int status; 463 int status;
464 464
465 spin_lock(&adapter->mbox_lock); 465 if (mutex_lock_interruptible(&adapter->mbox_lock))
466 return -1;
466 467
467 wrb = (u8 *)wrb_from_mbox(adapter); 468 wrb = (u8 *)wrb_from_mbox(adapter);
468 *wrb++ = 0xFF; 469 *wrb++ = 0xFF;
@@ -476,7 +477,7 @@ int be_cmd_fw_init(struct be_adapter *adapter)
476 477
477 status = be_mbox_notify_wait(adapter); 478 status = be_mbox_notify_wait(adapter);
478 479
479 spin_unlock(&adapter->mbox_lock); 480 mutex_unlock(&adapter->mbox_lock);
480 return status; 481 return status;
481} 482}
482 483
@@ -491,7 +492,8 @@ int be_cmd_fw_clean(struct be_adapter *adapter)
491 if (adapter->eeh_err) 492 if (adapter->eeh_err)
492 return -EIO; 493 return -EIO;
493 494
494 spin_lock(&adapter->mbox_lock); 495 if (mutex_lock_interruptible(&adapter->mbox_lock))
496 return -1;
495 497
496 wrb = (u8 *)wrb_from_mbox(adapter); 498 wrb = (u8 *)wrb_from_mbox(adapter);
497 *wrb++ = 0xFF; 499 *wrb++ = 0xFF;
@@ -505,7 +507,7 @@ int be_cmd_fw_clean(struct be_adapter *adapter)
505 507
506 status = be_mbox_notify_wait(adapter); 508 status = be_mbox_notify_wait(adapter);
507 509
508 spin_unlock(&adapter->mbox_lock); 510 mutex_unlock(&adapter->mbox_lock);
509 return status; 511 return status;
510} 512}
511int be_cmd_eq_create(struct be_adapter *adapter, 513int be_cmd_eq_create(struct be_adapter *adapter,
@@ -516,7 +518,8 @@ int be_cmd_eq_create(struct be_adapter *adapter,
516 struct be_dma_mem *q_mem = &eq->dma_mem; 518 struct be_dma_mem *q_mem = &eq->dma_mem;
517 int status; 519 int status;
518 520
519 spin_lock(&adapter->mbox_lock); 521 if (mutex_lock_interruptible(&adapter->mbox_lock))
522 return -1;
520 523
521 wrb = wrb_from_mbox(adapter); 524 wrb = wrb_from_mbox(adapter);
522 req = embedded_payload(wrb); 525 req = embedded_payload(wrb);
@@ -546,7 +549,7 @@ int be_cmd_eq_create(struct be_adapter *adapter,
546 eq->created = true; 549 eq->created = true;
547 } 550 }
548 551
549 spin_unlock(&adapter->mbox_lock); 552 mutex_unlock(&adapter->mbox_lock);
550 return status; 553 return status;
551} 554}
552 555
@@ -558,7 +561,8 @@ int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr,
558 struct be_cmd_req_mac_query *req; 561 struct be_cmd_req_mac_query *req;
559 int status; 562 int status;
560 563
561 spin_lock(&adapter->mbox_lock); 564 if (mutex_lock_interruptible(&adapter->mbox_lock))
565 return -1;
562 566
563 wrb = wrb_from_mbox(adapter); 567 wrb = wrb_from_mbox(adapter);
564 req = embedded_payload(wrb); 568 req = embedded_payload(wrb);
@@ -583,7 +587,7 @@ int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr,
583 memcpy(mac_addr, resp->mac.addr, ETH_ALEN); 587 memcpy(mac_addr, resp->mac.addr, ETH_ALEN);
584 } 588 }
585 589
586 spin_unlock(&adapter->mbox_lock); 590 mutex_unlock(&adapter->mbox_lock);
587 return status; 591 return status;
588} 592}
589 593
@@ -667,7 +671,8 @@ int be_cmd_cq_create(struct be_adapter *adapter,
667 void *ctxt; 671 void *ctxt;
668 int status; 672 int status;
669 673
670 spin_lock(&adapter->mbox_lock); 674 if (mutex_lock_interruptible(&adapter->mbox_lock))
675 return -1;
671 676
672 wrb = wrb_from_mbox(adapter); 677 wrb = wrb_from_mbox(adapter);
673 req = embedded_payload(wrb); 678 req = embedded_payload(wrb);
@@ -701,7 +706,7 @@ int be_cmd_cq_create(struct be_adapter *adapter,
701 cq->created = true; 706 cq->created = true;
702 } 707 }
703 708
704 spin_unlock(&adapter->mbox_lock); 709 mutex_unlock(&adapter->mbox_lock);
705 710
706 return status; 711 return status;
707} 712}
@@ -724,7 +729,8 @@ int be_cmd_mccq_create(struct be_adapter *adapter,
724 void *ctxt; 729 void *ctxt;
725 int status; 730 int status;
726 731
727 spin_lock(&adapter->mbox_lock); 732 if (mutex_lock_interruptible(&adapter->mbox_lock))
733 return -1;
728 734
729 wrb = wrb_from_mbox(adapter); 735 wrb = wrb_from_mbox(adapter);
730 req = embedded_payload(wrb); 736 req = embedded_payload(wrb);
@@ -754,7 +760,7 @@ int be_cmd_mccq_create(struct be_adapter *adapter,
754 mccq->id = le16_to_cpu(resp->id); 760 mccq->id = le16_to_cpu(resp->id);
755 mccq->created = true; 761 mccq->created = true;
756 } 762 }
757 spin_unlock(&adapter->mbox_lock); 763 mutex_unlock(&adapter->mbox_lock);
758 764
759 return status; 765 return status;
760} 766}
@@ -769,7 +775,8 @@ int be_cmd_txq_create(struct be_adapter *adapter,
769 void *ctxt; 775 void *ctxt;
770 int status; 776 int status;
771 777
772 spin_lock(&adapter->mbox_lock); 778 if (mutex_lock_interruptible(&adapter->mbox_lock))
779 return -1;
773 780
774 wrb = wrb_from_mbox(adapter); 781 wrb = wrb_from_mbox(adapter);
775 req = embedded_payload(wrb); 782 req = embedded_payload(wrb);
@@ -801,7 +808,7 @@ int be_cmd_txq_create(struct be_adapter *adapter,
801 txq->created = true; 808 txq->created = true;
802 } 809 }
803 810
804 spin_unlock(&adapter->mbox_lock); 811 mutex_unlock(&adapter->mbox_lock);
805 812
806 return status; 813 return status;
807} 814}
@@ -816,7 +823,8 @@ int be_cmd_rxq_create(struct be_adapter *adapter,
816 struct be_dma_mem *q_mem = &rxq->dma_mem; 823 struct be_dma_mem *q_mem = &rxq->dma_mem;
817 int status; 824 int status;
818 825
819 spin_lock(&adapter->mbox_lock); 826 if (mutex_lock_interruptible(&adapter->mbox_lock))
827 return -1;
820 828
821 wrb = wrb_from_mbox(adapter); 829 wrb = wrb_from_mbox(adapter);
822 req = embedded_payload(wrb); 830 req = embedded_payload(wrb);
@@ -843,7 +851,7 @@ int be_cmd_rxq_create(struct be_adapter *adapter,
843 *rss_id = resp->rss_id; 851 *rss_id = resp->rss_id;
844 } 852 }
845 853
846 spin_unlock(&adapter->mbox_lock); 854 mutex_unlock(&adapter->mbox_lock);
847 855
848 return status; 856 return status;
849} 857}
@@ -862,7 +870,8 @@ int be_cmd_q_destroy(struct be_adapter *adapter, struct be_queue_info *q,
862 if (adapter->eeh_err) 870 if (adapter->eeh_err)
863 return -EIO; 871 return -EIO;
864 872
865 spin_lock(&adapter->mbox_lock); 873 if (mutex_lock_interruptible(&adapter->mbox_lock))
874 return -1;
866 875
867 wrb = wrb_from_mbox(adapter); 876 wrb = wrb_from_mbox(adapter);
868 req = embedded_payload(wrb); 877 req = embedded_payload(wrb);
@@ -899,7 +908,7 @@ int be_cmd_q_destroy(struct be_adapter *adapter, struct be_queue_info *q,
899 908
900 status = be_mbox_notify_wait(adapter); 909 status = be_mbox_notify_wait(adapter);
901 910
902 spin_unlock(&adapter->mbox_lock); 911 mutex_unlock(&adapter->mbox_lock);
903 912
904 return status; 913 return status;
905} 914}
@@ -915,7 +924,8 @@ int be_cmd_if_create(struct be_adapter *adapter, u32 cap_flags, u32 en_flags,
915 struct be_cmd_req_if_create *req; 924 struct be_cmd_req_if_create *req;
916 int status; 925 int status;
917 926
918 spin_lock(&adapter->mbox_lock); 927 if (mutex_lock_interruptible(&adapter->mbox_lock))
928 return -1;
919 929
920 wrb = wrb_from_mbox(adapter); 930 wrb = wrb_from_mbox(adapter);
921 req = embedded_payload(wrb); 931 req = embedded_payload(wrb);
@@ -941,7 +951,7 @@ int be_cmd_if_create(struct be_adapter *adapter, u32 cap_flags, u32 en_flags,
941 *pmac_id = le32_to_cpu(resp->pmac_id); 951 *pmac_id = le32_to_cpu(resp->pmac_id);
942 } 952 }
943 953
944 spin_unlock(&adapter->mbox_lock); 954 mutex_unlock(&adapter->mbox_lock);
945 return status; 955 return status;
946} 956}
947 957
@@ -955,7 +965,8 @@ int be_cmd_if_destroy(struct be_adapter *adapter, u32 interface_id)
955 if (adapter->eeh_err) 965 if (adapter->eeh_err)
956 return -EIO; 966 return -EIO;
957 967
958 spin_lock(&adapter->mbox_lock); 968 if (mutex_lock_interruptible(&adapter->mbox_lock))
969 return -1;
959 970
960 wrb = wrb_from_mbox(adapter); 971 wrb = wrb_from_mbox(adapter);
961 req = embedded_payload(wrb); 972 req = embedded_payload(wrb);
@@ -970,7 +981,7 @@ int be_cmd_if_destroy(struct be_adapter *adapter, u32 interface_id)
970 981
971 status = be_mbox_notify_wait(adapter); 982 status = be_mbox_notify_wait(adapter);
972 983
973 spin_unlock(&adapter->mbox_lock); 984 mutex_unlock(&adapter->mbox_lock);
974 985
975 return status; 986 return status;
976} 987}
@@ -1060,7 +1071,8 @@ int be_cmd_get_fw_ver(struct be_adapter *adapter, char *fw_ver)
1060 struct be_cmd_req_get_fw_version *req; 1071 struct be_cmd_req_get_fw_version *req;
1061 int status; 1072 int status;
1062 1073
1063 spin_lock(&adapter->mbox_lock); 1074 if (mutex_lock_interruptible(&adapter->mbox_lock))
1075 return -1;
1064 1076
1065 wrb = wrb_from_mbox(adapter); 1077 wrb = wrb_from_mbox(adapter);
1066 req = embedded_payload(wrb); 1078 req = embedded_payload(wrb);
@@ -1077,7 +1089,7 @@ int be_cmd_get_fw_ver(struct be_adapter *adapter, char *fw_ver)
1077 strncpy(fw_ver, resp->firmware_version_string, FW_VER_LEN); 1089 strncpy(fw_ver, resp->firmware_version_string, FW_VER_LEN);
1078 } 1090 }
1079 1091
1080 spin_unlock(&adapter->mbox_lock); 1092 mutex_unlock(&adapter->mbox_lock);
1081 return status; 1093 return status;
1082} 1094}
1083 1095
@@ -1322,7 +1334,8 @@ int be_cmd_query_fw_cfg(struct be_adapter *adapter, u32 *port_num,
1322 struct be_cmd_req_query_fw_cfg *req; 1334 struct be_cmd_req_query_fw_cfg *req;
1323 int status; 1335 int status;
1324 1336
1325 spin_lock(&adapter->mbox_lock); 1337 if (mutex_lock_interruptible(&adapter->mbox_lock))
1338 return -1;
1326 1339
1327 wrb = wrb_from_mbox(adapter); 1340 wrb = wrb_from_mbox(adapter);
1328 req = embedded_payload(wrb); 1341 req = embedded_payload(wrb);
@@ -1341,7 +1354,7 @@ int be_cmd_query_fw_cfg(struct be_adapter *adapter, u32 *port_num,
1341 *caps = le32_to_cpu(resp->function_caps); 1354 *caps = le32_to_cpu(resp->function_caps);
1342 } 1355 }
1343 1356
1344 spin_unlock(&adapter->mbox_lock); 1357 mutex_unlock(&adapter->mbox_lock);
1345 return status; 1358 return status;
1346} 1359}
1347 1360
@@ -1352,7 +1365,8 @@ int be_cmd_reset_function(struct be_adapter *adapter)
1352 struct be_cmd_req_hdr *req; 1365 struct be_cmd_req_hdr *req;
1353 int status; 1366 int status;
1354 1367
1355 spin_lock(&adapter->mbox_lock); 1368 if (mutex_lock_interruptible(&adapter->mbox_lock))
1369 return -1;
1356 1370
1357 wrb = wrb_from_mbox(adapter); 1371 wrb = wrb_from_mbox(adapter);
1358 req = embedded_payload(wrb); 1372 req = embedded_payload(wrb);
@@ -1365,7 +1379,7 @@ int be_cmd_reset_function(struct be_adapter *adapter)
1365 1379
1366 status = be_mbox_notify_wait(adapter); 1380 status = be_mbox_notify_wait(adapter);
1367 1381
1368 spin_unlock(&adapter->mbox_lock); 1382 mutex_unlock(&adapter->mbox_lock);
1369 return status; 1383 return status;
1370} 1384}
1371 1385
@@ -1376,7 +1390,8 @@ int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable, u16 table_size)
1376 u32 myhash[10]; 1390 u32 myhash[10];
1377 int status; 1391 int status;
1378 1392
1379 spin_lock(&adapter->mbox_lock); 1393 if (mutex_lock_interruptible(&adapter->mbox_lock))
1394 return -1;
1380 1395
1381 wrb = wrb_from_mbox(adapter); 1396 wrb = wrb_from_mbox(adapter);
1382 req = embedded_payload(wrb); 1397 req = embedded_payload(wrb);
@@ -1396,7 +1411,7 @@ int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable, u16 table_size)
1396 1411
1397 status = be_mbox_notify_wait(adapter); 1412 status = be_mbox_notify_wait(adapter);
1398 1413
1399 spin_unlock(&adapter->mbox_lock); 1414 mutex_unlock(&adapter->mbox_lock);
1400 return status; 1415 return status;
1401} 1416}
1402 1417
diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c
index 93354eee2cfd..fd251b59b7f9 100644
--- a/drivers/net/benet/be_main.c
+++ b/drivers/net/benet/be_main.c
@@ -2677,7 +2677,7 @@ static int be_ctrl_init(struct be_adapter *adapter)
2677 } 2677 }
2678 memset(mc_cmd_mem->va, 0, mc_cmd_mem->size); 2678 memset(mc_cmd_mem->va, 0, mc_cmd_mem->size);
2679 2679
2680 spin_lock_init(&adapter->mbox_lock); 2680 mutex_init(&adapter->mbox_lock);
2681 spin_lock_init(&adapter->mcc_lock); 2681 spin_lock_init(&adapter->mcc_lock);
2682 spin_lock_init(&adapter->mcc_cq_lock); 2682 spin_lock_init(&adapter->mcc_cq_lock);
2683 2683
diff --git a/drivers/net/bonding/bond_ipv6.c b/drivers/net/bonding/bond_ipv6.c
index 121b073a6c3f..84fbd4ebd778 100644
--- a/drivers/net/bonding/bond_ipv6.c
+++ b/drivers/net/bonding/bond_ipv6.c
@@ -88,7 +88,12 @@ static void bond_na_send(struct net_device *slave_dev,
88 } 88 }
89 89
90 if (vlan_id) { 90 if (vlan_id) {
91 skb = vlan_put_tag(skb, vlan_id); 91 /* The Ethernet header is not present yet, so it is
92 * too early to insert a VLAN tag. Force use of an
93 * out-of-line tag here and let dev_hard_start_xmit()
94 * insert it if the slave hardware can't.
95 */
96 skb = __vlan_hwaccel_put_tag(skb, vlan_id);
92 if (!skb) { 97 if (!skb) {
93 pr_err("failed to insert VLAN tag\n"); 98 pr_err("failed to insert VLAN tag\n");
94 return; 99 return;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index d0ea760ce419..3b16c34ed86e 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -418,36 +418,11 @@ struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr)
418 * @bond: bond device that got this skb for tx. 418 * @bond: bond device that got this skb for tx.
419 * @skb: hw accel VLAN tagged skb to transmit 419 * @skb: hw accel VLAN tagged skb to transmit
420 * @slave_dev: slave that is supposed to xmit this skbuff 420 * @slave_dev: slave that is supposed to xmit this skbuff
421 *
422 * When the bond gets an skb to transmit that is
423 * already hardware accelerated VLAN tagged, and it
424 * needs to relay this skb to a slave that is not
425 * hw accel capable, the skb needs to be "unaccelerated",
426 * i.e. strip the hwaccel tag and re-insert it as part
427 * of the payload.
428 */ 421 */
429int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, 422int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
430 struct net_device *slave_dev) 423 struct net_device *slave_dev)
431{ 424{
432 unsigned short uninitialized_var(vlan_id); 425 skb->dev = slave_dev;
433
434 /* Test vlan_list not vlgrp to catch and handle 802.1p tags */
435 if (!list_empty(&bond->vlan_list) &&
436 !(slave_dev->features & NETIF_F_HW_VLAN_TX) &&
437 vlan_get_tag(skb, &vlan_id) == 0) {
438 skb->dev = slave_dev;
439 skb = vlan_put_tag(skb, vlan_id);
440 if (!skb) {
441 /* vlan_put_tag() frees the skb in case of error,
442 * so return success here so the calling functions
443 * won't attempt to free is again.
444 */
445 return 0;
446 }
447 } else {
448 skb->dev = slave_dev;
449 }
450
451 skb->priority = 1; 426 skb->priority = 1;
452#ifdef CONFIG_NET_POLL_CONTROLLER 427#ifdef CONFIG_NET_POLL_CONTROLLER
453 if (unlikely(bond->dev->priv_flags & IFF_IN_NETPOLL)) { 428 if (unlikely(bond->dev->priv_flags & IFF_IN_NETPOLL)) {
@@ -1203,11 +1178,13 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
1203 bond_do_fail_over_mac(bond, new_active, 1178 bond_do_fail_over_mac(bond, new_active,
1204 old_active); 1179 old_active);
1205 1180
1206 bond->send_grat_arp = bond->params.num_grat_arp; 1181 if (netif_running(bond->dev)) {
1207 bond_send_gratuitous_arp(bond); 1182 bond->send_grat_arp = bond->params.num_grat_arp;
1183 bond_send_gratuitous_arp(bond);
1208 1184
1209 bond->send_unsol_na = bond->params.num_unsol_na; 1185 bond->send_unsol_na = bond->params.num_unsol_na;
1210 bond_send_unsolicited_na(bond); 1186 bond_send_unsolicited_na(bond);
1187 }
1211 1188
1212 write_unlock_bh(&bond->curr_slave_lock); 1189 write_unlock_bh(&bond->curr_slave_lock);
1213 read_unlock(&bond->lock); 1190 read_unlock(&bond->lock);
@@ -1221,8 +1198,9 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
1221 1198
1222 /* resend IGMP joins since active slave has changed or 1199 /* resend IGMP joins since active slave has changed or
1223 * all were sent on curr_active_slave */ 1200 * all were sent on curr_active_slave */
1224 if ((USES_PRIMARY(bond->params.mode) && new_active) || 1201 if (((USES_PRIMARY(bond->params.mode) && new_active) ||
1225 bond->params.mode == BOND_MODE_ROUNDROBIN) { 1202 bond->params.mode == BOND_MODE_ROUNDROBIN) &&
1203 netif_running(bond->dev)) {
1226 bond->igmp_retrans = bond->params.resend_igmp; 1204 bond->igmp_retrans = bond->params.resend_igmp;
1227 queue_delayed_work(bond->wq, &bond->mcast_work, 0); 1205 queue_delayed_work(bond->wq, &bond->mcast_work, 0);
1228 } 1206 }
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index c2f081352a03..4feeb2d650a4 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -269,11 +269,11 @@ static inline struct slave *bond_get_slave_by_dev(struct bonding *bond, struct n
269 269
270 bond_for_each_slave(bond, slave, i) { 270 bond_for_each_slave(bond, slave, i) {
271 if (slave->dev == slave_dev) { 271 if (slave->dev == slave_dev) {
272 break; 272 return slave;
273 } 273 }
274 } 274 }
275 275
276 return slave; 276 return 0;
277} 277}
278 278
279static inline struct bonding *bond_get_bond_by_slave(struct slave *slave) 279static inline struct bonding *bond_get_bond_by_slave(struct slave *slave)
diff --git a/drivers/net/cnic.c b/drivers/net/cnic.c
index 92bac19ad60a..6dff32196c92 100644
--- a/drivers/net/cnic.c
+++ b/drivers/net/cnic.c
@@ -940,7 +940,7 @@ static int cnic_alloc_uio_rings(struct cnic_dev *dev, int pages)
940 &udev->l2_ring_map, 940 &udev->l2_ring_map,
941 GFP_KERNEL | __GFP_COMP); 941 GFP_KERNEL | __GFP_COMP);
942 if (!udev->l2_ring) 942 if (!udev->l2_ring)
943 return -ENOMEM; 943 goto err_udev;
944 944
945 udev->l2_buf_size = (cp->l2_rx_ring_size + 1) * cp->l2_single_buf_size; 945 udev->l2_buf_size = (cp->l2_rx_ring_size + 1) * cp->l2_single_buf_size;
946 udev->l2_buf_size = PAGE_ALIGN(udev->l2_buf_size); 946 udev->l2_buf_size = PAGE_ALIGN(udev->l2_buf_size);
@@ -948,7 +948,7 @@ static int cnic_alloc_uio_rings(struct cnic_dev *dev, int pages)
948 &udev->l2_buf_map, 948 &udev->l2_buf_map,
949 GFP_KERNEL | __GFP_COMP); 949 GFP_KERNEL | __GFP_COMP);
950 if (!udev->l2_buf) 950 if (!udev->l2_buf)
951 return -ENOMEM; 951 goto err_dma;
952 952
953 write_lock(&cnic_dev_lock); 953 write_lock(&cnic_dev_lock);
954 list_add(&udev->list, &cnic_udev_list); 954 list_add(&udev->list, &cnic_udev_list);
@@ -959,6 +959,12 @@ static int cnic_alloc_uio_rings(struct cnic_dev *dev, int pages)
959 cp->udev = udev; 959 cp->udev = udev;
960 960
961 return 0; 961 return 0;
962 err_dma:
963 dma_free_coherent(&udev->pdev->dev, udev->l2_ring_size,
964 udev->l2_ring, udev->l2_ring_map);
965 err_udev:
966 kfree(udev);
967 return -ENOMEM;
962} 968}
963 969
964static int cnic_init_uio(struct cnic_dev *dev) 970static int cnic_init_uio(struct cnic_dev *dev)
diff --git a/drivers/net/ehea/ehea_ethtool.c b/drivers/net/ehea/ehea_ethtool.c
index 1f37ee6b2a26..d6cf502906cf 100644
--- a/drivers/net/ehea/ehea_ethtool.c
+++ b/drivers/net/ehea/ehea_ethtool.c
@@ -263,6 +263,13 @@ static void ehea_get_ethtool_stats(struct net_device *dev,
263 263
264static int ehea_set_flags(struct net_device *dev, u32 data) 264static int ehea_set_flags(struct net_device *dev, u32 data)
265{ 265{
266 /* Avoid changing the VLAN flags */
267 if ((data & (ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN)) !=
268 (ethtool_op_get_flags(dev) & (ETH_FLAG_RXVLAN |
269 ETH_FLAG_TXVLAN))){
270 return -EINVAL;
271 }
272
266 return ethtool_op_set_flags(dev, data, ETH_FLAG_LRO 273 return ethtool_op_set_flags(dev, data, ETH_FLAG_LRO
267 | ETH_FLAG_TXVLAN 274 | ETH_FLAG_TXVLAN
268 | ETH_FLAG_RXVLAN); 275 | ETH_FLAG_RXVLAN);
diff --git a/drivers/net/epic100.c b/drivers/net/epic100.c
index aa56963ad558..c353bf3113cc 100644
--- a/drivers/net/epic100.c
+++ b/drivers/net/epic100.c
@@ -935,7 +935,7 @@ static void epic_init_ring(struct net_device *dev)
935 935
936 /* Fill in the Rx buffers. Handle allocation failure gracefully. */ 936 /* Fill in the Rx buffers. Handle allocation failure gracefully. */
937 for (i = 0; i < RX_RING_SIZE; i++) { 937 for (i = 0; i < RX_RING_SIZE; i++) {
938 struct sk_buff *skb = dev_alloc_skb(ep->rx_buf_sz); 938 struct sk_buff *skb = dev_alloc_skb(ep->rx_buf_sz + 2);
939 ep->rx_skbuff[i] = skb; 939 ep->rx_skbuff[i] = skb;
940 if (skb == NULL) 940 if (skb == NULL)
941 break; 941 break;
@@ -1233,7 +1233,7 @@ static int epic_rx(struct net_device *dev, int budget)
1233 entry = ep->dirty_rx % RX_RING_SIZE; 1233 entry = ep->dirty_rx % RX_RING_SIZE;
1234 if (ep->rx_skbuff[entry] == NULL) { 1234 if (ep->rx_skbuff[entry] == NULL) {
1235 struct sk_buff *skb; 1235 struct sk_buff *skb;
1236 skb = ep->rx_skbuff[entry] = dev_alloc_skb(ep->rx_buf_sz); 1236 skb = ep->rx_skbuff[entry] = dev_alloc_skb(ep->rx_buf_sz + 2);
1237 if (skb == NULL) 1237 if (skb == NULL)
1238 break; 1238 break;
1239 skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ 1239 skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */
diff --git a/drivers/net/hamachi.c b/drivers/net/hamachi.c
index 9a6485892b3d..80d25ed53344 100644
--- a/drivers/net/hamachi.c
+++ b/drivers/net/hamachi.c
@@ -1202,7 +1202,7 @@ static void hamachi_init_ring(struct net_device *dev)
1202 } 1202 }
1203 /* Fill in the Rx buffers. Handle allocation failure gracefully. */ 1203 /* Fill in the Rx buffers. Handle allocation failure gracefully. */
1204 for (i = 0; i < RX_RING_SIZE; i++) { 1204 for (i = 0; i < RX_RING_SIZE; i++) {
1205 struct sk_buff *skb = dev_alloc_skb(hmp->rx_buf_sz); 1205 struct sk_buff *skb = dev_alloc_skb(hmp->rx_buf_sz + 2);
1206 hmp->rx_skbuff[i] = skb; 1206 hmp->rx_skbuff[i] = skb;
1207 if (skb == NULL) 1207 if (skb == NULL)
1208 break; 1208 break;
@@ -1669,7 +1669,7 @@ static int hamachi_rx(struct net_device *dev)
1669 entry = hmp->dirty_rx % RX_RING_SIZE; 1669 entry = hmp->dirty_rx % RX_RING_SIZE;
1670 desc = &(hmp->rx_ring[entry]); 1670 desc = &(hmp->rx_ring[entry]);
1671 if (hmp->rx_skbuff[entry] == NULL) { 1671 if (hmp->rx_skbuff[entry] == NULL) {
1672 struct sk_buff *skb = dev_alloc_skb(hmp->rx_buf_sz); 1672 struct sk_buff *skb = dev_alloc_skb(hmp->rx_buf_sz + 2);
1673 1673
1674 hmp->rx_skbuff[entry] = skb; 1674 hmp->rx_skbuff[entry] = skb;
1675 if (skb == NULL) 1675 if (skb == NULL)
diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c
index 8a4d19e5de06..f1047dd8a526 100644
--- a/drivers/net/pcmcia/axnet_cs.c
+++ b/drivers/net/pcmcia/axnet_cs.c
@@ -690,6 +690,7 @@ static void block_output(struct net_device *dev, int count,
690static struct pcmcia_device_id axnet_ids[] = { 690static struct pcmcia_device_id axnet_ids[] = {
691 PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x016c, 0x0081), 691 PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x016c, 0x0081),
692 PCMCIA_DEVICE_MANF_CARD(0x018a, 0x0301), 692 PCMCIA_DEVICE_MANF_CARD(0x018a, 0x0301),
693 PCMCIA_DEVICE_MANF_CARD(0x01bf, 0x2328),
693 PCMCIA_DEVICE_MANF_CARD(0x026f, 0x0301), 694 PCMCIA_DEVICE_MANF_CARD(0x026f, 0x0301),
694 PCMCIA_DEVICE_MANF_CARD(0x026f, 0x0303), 695 PCMCIA_DEVICE_MANF_CARD(0x026f, 0x0303),
695 PCMCIA_DEVICE_MANF_CARD(0x026f, 0x0309), 696 PCMCIA_DEVICE_MANF_CARD(0x026f, 0x0309),
diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c
index d05c44692f08..2c158910f7ea 100644
--- a/drivers/net/pcmcia/pcnet_cs.c
+++ b/drivers/net/pcmcia/pcnet_cs.c
@@ -1493,7 +1493,6 @@ static struct pcmcia_device_id pcnet_ids[] = {
1493 PCMCIA_DEVICE_MANF_CARD(0x0149, 0x4530), 1493 PCMCIA_DEVICE_MANF_CARD(0x0149, 0x4530),
1494 PCMCIA_DEVICE_MANF_CARD(0x0149, 0xc1ab), 1494 PCMCIA_DEVICE_MANF_CARD(0x0149, 0xc1ab),
1495 PCMCIA_DEVICE_MANF_CARD(0x0186, 0x0110), 1495 PCMCIA_DEVICE_MANF_CARD(0x0186, 0x0110),
1496 PCMCIA_DEVICE_MANF_CARD(0x01bf, 0x2328),
1497 PCMCIA_DEVICE_MANF_CARD(0x01bf, 0x8041), 1496 PCMCIA_DEVICE_MANF_CARD(0x01bf, 0x8041),
1498 PCMCIA_DEVICE_MANF_CARD(0x0213, 0x2452), 1497 PCMCIA_DEVICE_MANF_CARD(0x0213, 0x2452),
1499 PCMCIA_DEVICE_MANF_CARD(0x026f, 0x0300), 1498 PCMCIA_DEVICE_MANF_CARD(0x026f, 0x0300),
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 39659976a1ac..89294b43c4a9 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -1285,6 +1285,11 @@ ppp_push(struct ppp *ppp)
1285} 1285}
1286 1286
1287#ifdef CONFIG_PPP_MULTILINK 1287#ifdef CONFIG_PPP_MULTILINK
1288static bool mp_protocol_compress __read_mostly = true;
1289module_param(mp_protocol_compress, bool, S_IRUGO | S_IWUSR);
1290MODULE_PARM_DESC(mp_protocol_compress,
1291 "compress protocol id in multilink fragments");
1292
1288/* 1293/*
1289 * Divide a packet to be transmitted into fragments and 1294 * Divide a packet to be transmitted into fragments and
1290 * send them out the individual links. 1295 * send them out the individual links.
@@ -1347,10 +1352,10 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
1347 if (nfree == 0 || nfree < navail / 2) 1352 if (nfree == 0 || nfree < navail / 2)
1348 return 0; /* can't take now, leave it in xmit_pending */ 1353 return 0; /* can't take now, leave it in xmit_pending */
1349 1354
1350 /* Do protocol field compression (XXX this should be optional) */ 1355 /* Do protocol field compression */
1351 p = skb->data; 1356 p = skb->data;
1352 len = skb->len; 1357 len = skb->len;
1353 if (*p == 0) { 1358 if (*p == 0 && mp_protocol_compress) {
1354 ++p; 1359 ++p;
1355 --len; 1360 --len;
1356 } 1361 }
diff --git a/drivers/net/skfp/skfddi.c b/drivers/net/skfp/skfddi.c
index 0a66fed52e8e..16c62659cdd9 100644
--- a/drivers/net/skfp/skfddi.c
+++ b/drivers/net/skfp/skfddi.c
@@ -412,7 +412,7 @@ static int skfp_driver_init(struct net_device *dev)
412 bp->SharedMemAddr = pci_alloc_consistent(&bp->pdev, 412 bp->SharedMemAddr = pci_alloc_consistent(&bp->pdev,
413 bp->SharedMemSize, 413 bp->SharedMemSize,
414 &bp->SharedMemDMA); 414 &bp->SharedMemDMA);
415 if (!bp->SharedMemSize) { 415 if (!bp->SharedMemAddr) {
416 printk("could not allocate mem for "); 416 printk("could not allocate mem for ");
417 printk("hardware module: %ld byte\n", 417 printk("hardware module: %ld byte\n",
418 bp->SharedMemSize); 418 bp->SharedMemSize);
diff --git a/drivers/net/starfire.c b/drivers/net/starfire.c
index 4adf12422787..a4f2bd52e546 100644
--- a/drivers/net/starfire.c
+++ b/drivers/net/starfire.c
@@ -148,7 +148,7 @@ static int full_duplex[MAX_UNITS] = {0, };
148 * This SUCKS. 148 * This SUCKS.
149 * We need a much better method to determine if dma_addr_t is 64-bit. 149 * We need a much better method to determine if dma_addr_t is 64-bit.
150 */ 150 */
151#if (defined(__i386__) && defined(CONFIG_HIGHMEM64G)) || defined(__x86_64__) || defined (__ia64__) || defined(__alpha__) || defined(__mips64__) || (defined(__mips__) && defined(CONFIG_HIGHMEM) && defined(CONFIG_64BIT_PHYS_ADDR)) || (defined(__powerpc64__) || defined(CONFIG_PHYS_64BIT)) 151#if (defined(__i386__) && defined(CONFIG_HIGHMEM64G)) || defined(__x86_64__) || defined (__ia64__) || defined(__alpha__) || (defined(CONFIG_MIPS) && ((defined(CONFIG_HIGHMEM) && defined(CONFIG_64BIT_PHYS_ADDR)) || defined(CONFIG_64BIT))) || (defined(__powerpc64__) || defined(CONFIG_PHYS_64BIT))
152/* 64-bit dma_addr_t */ 152/* 64-bit dma_addr_t */
153#define ADDR_64BITS /* This chip uses 64 bit addresses. */ 153#define ADDR_64BITS /* This chip uses 64 bit addresses. */
154#define netdrv_addr_t __le64 154#define netdrv_addr_t __le64
diff --git a/drivers/net/sundance.c b/drivers/net/sundance.c
index 3ed2a67bd6d3..b409d7ec4ac1 100644
--- a/drivers/net/sundance.c
+++ b/drivers/net/sundance.c
@@ -1016,7 +1016,7 @@ static void init_ring(struct net_device *dev)
1016 1016
1017 /* Fill in the Rx buffers. Handle allocation failure gracefully. */ 1017 /* Fill in the Rx buffers. Handle allocation failure gracefully. */
1018 for (i = 0; i < RX_RING_SIZE; i++) { 1018 for (i = 0; i < RX_RING_SIZE; i++) {
1019 struct sk_buff *skb = dev_alloc_skb(np->rx_buf_sz); 1019 struct sk_buff *skb = dev_alloc_skb(np->rx_buf_sz + 2);
1020 np->rx_skbuff[i] = skb; 1020 np->rx_skbuff[i] = skb;
1021 if (skb == NULL) 1021 if (skb == NULL)
1022 break; 1022 break;
@@ -1407,7 +1407,7 @@ static void refill_rx (struct net_device *dev)
1407 struct sk_buff *skb; 1407 struct sk_buff *skb;
1408 entry = np->dirty_rx % RX_RING_SIZE; 1408 entry = np->dirty_rx % RX_RING_SIZE;
1409 if (np->rx_skbuff[entry] == NULL) { 1409 if (np->rx_skbuff[entry] == NULL) {
1410 skb = dev_alloc_skb(np->rx_buf_sz); 1410 skb = dev_alloc_skb(np->rx_buf_sz + 2);
1411 np->rx_skbuff[entry] = skb; 1411 np->rx_skbuff[entry] = skb;
1412 if (skb == NULL) 1412 if (skb == NULL)
1413 break; /* Better luck next round. */ 1413 break; /* Better luck next round. */
diff --git a/drivers/net/tehuti.c b/drivers/net/tehuti.c
index 8b3dc1eb4015..296000bf5a25 100644
--- a/drivers/net/tehuti.c
+++ b/drivers/net/tehuti.c
@@ -324,7 +324,7 @@ static int bdx_fw_load(struct bdx_priv *priv)
324 ENTER; 324 ENTER;
325 master = READ_REG(priv, regINIT_SEMAPHORE); 325 master = READ_REG(priv, regINIT_SEMAPHORE);
326 if (!READ_REG(priv, regINIT_STATUS) && master) { 326 if (!READ_REG(priv, regINIT_STATUS) && master) {
327 rc = request_firmware(&fw, "tehuti/firmware.bin", &priv->pdev->dev); 327 rc = request_firmware(&fw, "tehuti/bdx.bin", &priv->pdev->dev);
328 if (rc) 328 if (rc)
329 goto out; 329 goto out;
330 bdx_tx_push_desc_safe(priv, (char *)fw->data, fw->size); 330 bdx_tx_push_desc_safe(priv, (char *)fw->data, fw->size);
@@ -2510,4 +2510,4 @@ module_exit(bdx_module_exit);
2510MODULE_LICENSE("GPL"); 2510MODULE_LICENSE("GPL");
2511MODULE_AUTHOR(DRIVER_AUTHOR); 2511MODULE_AUTHOR(DRIVER_AUTHOR);
2512MODULE_DESCRIPTION(BDX_DRV_DESC); 2512MODULE_DESCRIPTION(BDX_DRV_DESC);
2513MODULE_FIRMWARE("tehuti/firmware.bin"); 2513MODULE_FIRMWARE("tehuti/bdx.bin");
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 30ccbb6d097a..6f97b7bbcbf1 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -12658,7 +12658,7 @@ static void __devinit tg3_read_vpd(struct tg3 *tp)
12658 cnt = pci_read_vpd(tp->pdev, pos, 12658 cnt = pci_read_vpd(tp->pdev, pos,
12659 TG3_NVM_VPD_LEN - pos, 12659 TG3_NVM_VPD_LEN - pos,
12660 &vpd_data[pos]); 12660 &vpd_data[pos]);
12661 if (cnt == -ETIMEDOUT || -EINTR) 12661 if (cnt == -ETIMEDOUT || cnt == -EINTR)
12662 cnt = 0; 12662 cnt = 0;
12663 else if (cnt < 0) 12663 else if (cnt < 0)
12664 goto out_not_found; 12664 goto out_not_found;
diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c
index 5b83c3f35f47..a3c46f6a15e7 100644
--- a/drivers/net/typhoon.c
+++ b/drivers/net/typhoon.c
@@ -1004,7 +1004,6 @@ typhoon_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
1004 } 1004 }
1005 1005
1006 strcpy(info->driver, KBUILD_MODNAME); 1006 strcpy(info->driver, KBUILD_MODNAME);
1007 strcpy(info->version, UTS_RELEASE);
1008 strcpy(info->bus_info, pci_name(pci_dev)); 1007 strcpy(info->bus_info, pci_name(pci_dev));
1009} 1008}
1010 1009
diff --git a/drivers/net/usb/asix.c b/drivers/net/usb/asix.c
index aea4645be7f6..6140b56cce53 100644
--- a/drivers/net/usb/asix.c
+++ b/drivers/net/usb/asix.c
@@ -1508,6 +1508,10 @@ static const struct usb_device_id products [] = {
1508 USB_DEVICE (0x0b95, 0x1780), 1508 USB_DEVICE (0x0b95, 0x1780),
1509 .driver_info = (unsigned long) &ax88178_info, 1509 .driver_info = (unsigned long) &ax88178_info,
1510}, { 1510}, {
1511 // Logitec LAN-GTJ/U2A
1512 USB_DEVICE (0x0789, 0x0160),
1513 .driver_info = (unsigned long) &ax88178_info,
1514}, {
1511 // Linksys USB200M Rev 2 1515 // Linksys USB200M Rev 2
1512 USB_DEVICE (0x13b1, 0x0018), 1516 USB_DEVICE (0x13b1, 0x0018),
1513 .driver_info = (unsigned long) &ax88772_info, 1517 .driver_info = (unsigned long) &ax88772_info,
diff --git a/drivers/net/usb/mcs7830.c b/drivers/net/usb/mcs7830.c
index a6281e3987b5..2b791392e788 100644
--- a/drivers/net/usb/mcs7830.c
+++ b/drivers/net/usb/mcs7830.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * MOSCHIP MCS7830 based USB 2.0 Ethernet Devices 2 * MOSCHIP MCS7830 based (7730/7830/7832) USB 2.0 Ethernet Devices
3 * 3 *
4 * based on usbnet.c, asix.c and the vendor provided mcs7830 driver 4 * based on usbnet.c, asix.c and the vendor provided mcs7830 driver
5 * 5 *
@@ -11,6 +11,9 @@
11 * 11 *
12 * Definitions gathered from MOSCHIP, Data Sheet_7830DA.pdf (thanks!). 12 * Definitions gathered from MOSCHIP, Data Sheet_7830DA.pdf (thanks!).
13 * 13 *
14 * 2010-12-19: add 7832 USB PID ("functionality same as MCS7830"),
15 * per active notification by manufacturer
16 *
14 * TODO: 17 * TODO:
15 * - support HIF_REG_CONFIG_SLEEPMODE/HIF_REG_CONFIG_TXENABLE (via autopm?) 18 * - support HIF_REG_CONFIG_SLEEPMODE/HIF_REG_CONFIG_TXENABLE (via autopm?)
16 * - implement ethtool_ops get_pauseparam/set_pauseparam 19 * - implement ethtool_ops get_pauseparam/set_pauseparam
@@ -60,6 +63,7 @@
60#define MCS7830_MAX_MCAST 64 63#define MCS7830_MAX_MCAST 64
61 64
62#define MCS7830_VENDOR_ID 0x9710 65#define MCS7830_VENDOR_ID 0x9710
66#define MCS7832_PRODUCT_ID 0x7832
63#define MCS7830_PRODUCT_ID 0x7830 67#define MCS7830_PRODUCT_ID 0x7830
64#define MCS7730_PRODUCT_ID 0x7730 68#define MCS7730_PRODUCT_ID 0x7730
65 69
@@ -351,7 +355,7 @@ static int mcs7830_set_autoneg(struct usbnet *dev, int ptrUserPhyMode)
351 if (!ret) 355 if (!ret)
352 ret = mcs7830_write_phy(dev, MII_BMCR, 356 ret = mcs7830_write_phy(dev, MII_BMCR,
353 BMCR_ANENABLE | BMCR_ANRESTART ); 357 BMCR_ANENABLE | BMCR_ANRESTART );
354 return ret < 0 ? : 0; 358 return ret;
355} 359}
356 360
357 361
@@ -626,7 +630,7 @@ static int mcs7830_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
626} 630}
627 631
628static const struct driver_info moschip_info = { 632static const struct driver_info moschip_info = {
629 .description = "MOSCHIP 7830/7730 usb-NET adapter", 633 .description = "MOSCHIP 7830/7832/7730 usb-NET adapter",
630 .bind = mcs7830_bind, 634 .bind = mcs7830_bind,
631 .rx_fixup = mcs7830_rx_fixup, 635 .rx_fixup = mcs7830_rx_fixup,
632 .flags = FLAG_ETHER, 636 .flags = FLAG_ETHER,
@@ -645,6 +649,10 @@ static const struct driver_info sitecom_info = {
645 649
646static const struct usb_device_id products[] = { 650static const struct usb_device_id products[] = {
647 { 651 {
652 USB_DEVICE(MCS7830_VENDOR_ID, MCS7832_PRODUCT_ID),
653 .driver_info = (unsigned long) &moschip_info,
654 },
655 {
648 USB_DEVICE(MCS7830_VENDOR_ID, MCS7830_PRODUCT_ID), 656 USB_DEVICE(MCS7830_VENDOR_ID, MCS7830_PRODUCT_ID),
649 .driver_info = (unsigned long) &moschip_info, 657 .driver_info = (unsigned long) &moschip_info,
650 }, 658 },
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 0bbc0c323135..cc83fa71c3ff 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -166,7 +166,9 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
166 if (!(rcv->flags & IFF_UP)) 166 if (!(rcv->flags & IFF_UP))
167 goto tx_drop; 167 goto tx_drop;
168 168
169 if (dev->features & NETIF_F_NO_CSUM) 169 /* don't change ip_summed == CHECKSUM_PARTIAL, as that
170 will cause bad checksum on forwarded packets */
171 if (skb->ip_summed == CHECKSUM_NONE)
170 skb->ip_summed = rcv_priv->ip_summed; 172 skb->ip_summed = rcv_priv->ip_summed;
171 173
172 length = skb->len + ETH_HLEN; 174 length = skb->len + ETH_HLEN;
diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c
index 25a2722c8a98..1d9aed645723 100644
--- a/drivers/net/wireless/hostap/hostap_main.c
+++ b/drivers/net/wireless/hostap/hostap_main.c
@@ -891,7 +891,6 @@ void hostap_setup_dev(struct net_device *dev, local_info_t *local,
891 891
892 SET_ETHTOOL_OPS(dev, &prism2_ethtool_ops); 892 SET_ETHTOOL_OPS(dev, &prism2_ethtool_ops);
893 893
894 netif_stop_queue(dev);
895} 894}
896 895
897static int hostap_enable_hostapd(local_info_t *local, int rtnl_locked) 896static int hostap_enable_hostapd(local_info_t *local, int rtnl_locked)
diff --git a/drivers/net/wireless/iwlwifi/iwl-1000.c b/drivers/net/wireless/iwlwifi/iwl-1000.c
index db540910b110..0e027f787fbc 100644
--- a/drivers/net/wireless/iwlwifi/iwl-1000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-1000.c
@@ -315,6 +315,7 @@ struct iwl_cfg iwl100_bgn_cfg = {
315 .mod_params = &iwlagn_mod_params, 315 .mod_params = &iwlagn_mod_params,
316 .base_params = &iwl1000_base_params, 316 .base_params = &iwl1000_base_params,
317 .ht_params = &iwl1000_ht_params, 317 .ht_params = &iwl1000_ht_params,
318 .use_new_eeprom_reading = true,
318}; 319};
319 320
320struct iwl_cfg iwl100_bg_cfg = { 321struct iwl_cfg iwl100_bg_cfg = {
@@ -330,6 +331,7 @@ struct iwl_cfg iwl100_bg_cfg = {
330 .ops = &iwl1000_ops, 331 .ops = &iwl1000_ops,
331 .mod_params = &iwlagn_mod_params, 332 .mod_params = &iwlagn_mod_params,
332 .base_params = &iwl1000_base_params, 333 .base_params = &iwl1000_base_params,
334 .use_new_eeprom_reading = true,
333}; 335};
334 336
335MODULE_FIRMWARE(IWL1000_MODULE_FIRMWARE(IWL1000_UCODE_API_MAX)); 337MODULE_FIRMWARE(IWL1000_MODULE_FIRMWARE(IWL1000_UCODE_API_MAX));
diff --git a/drivers/net/wireless/iwlwifi/iwl-6000.c b/drivers/net/wireless/iwlwifi/iwl-6000.c
index 11e6532fc573..0ceeaac85eda 100644
--- a/drivers/net/wireless/iwlwifi/iwl-6000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-6000.c
@@ -561,6 +561,7 @@ struct iwl_cfg iwl6000g2a_2agn_cfg = {
561 .ht_params = &iwl6000_ht_params, 561 .ht_params = &iwl6000_ht_params,
562 .need_dc_calib = true, 562 .need_dc_calib = true,
563 .need_temp_offset_calib = true, 563 .need_temp_offset_calib = true,
564 .use_new_eeprom_reading = true,
564}; 565};
565 566
566struct iwl_cfg iwl6000g2a_2abg_cfg = { 567struct iwl_cfg iwl6000g2a_2abg_cfg = {
@@ -578,6 +579,7 @@ struct iwl_cfg iwl6000g2a_2abg_cfg = {
578 .base_params = &iwl6000_base_params, 579 .base_params = &iwl6000_base_params,
579 .need_dc_calib = true, 580 .need_dc_calib = true,
580 .need_temp_offset_calib = true, 581 .need_temp_offset_calib = true,
582 .use_new_eeprom_reading = true,
581}; 583};
582 584
583struct iwl_cfg iwl6000g2a_2bg_cfg = { 585struct iwl_cfg iwl6000g2a_2bg_cfg = {
@@ -595,6 +597,7 @@ struct iwl_cfg iwl6000g2a_2bg_cfg = {
595 .base_params = &iwl6000_base_params, 597 .base_params = &iwl6000_base_params,
596 .need_dc_calib = true, 598 .need_dc_calib = true,
597 .need_temp_offset_calib = true, 599 .need_temp_offset_calib = true,
600 .use_new_eeprom_reading = true,
598}; 601};
599 602
600struct iwl_cfg iwl6000g2b_2agn_cfg = { 603struct iwl_cfg iwl6000g2b_2agn_cfg = {
@@ -616,6 +619,7 @@ struct iwl_cfg iwl6000g2b_2agn_cfg = {
616 .need_temp_offset_calib = true, 619 .need_temp_offset_calib = true,
617 /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */ 620 /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */
618 .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A, 621 .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A,
622 .use_new_eeprom_reading = true,
619}; 623};
620 624
621struct iwl_cfg iwl6000g2b_2abg_cfg = { 625struct iwl_cfg iwl6000g2b_2abg_cfg = {
@@ -636,6 +640,7 @@ struct iwl_cfg iwl6000g2b_2abg_cfg = {
636 .need_temp_offset_calib = true, 640 .need_temp_offset_calib = true,
637 /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */ 641 /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */
638 .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A, 642 .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A,
643 .use_new_eeprom_reading = true,
639}; 644};
640 645
641struct iwl_cfg iwl6000g2b_2bgn_cfg = { 646struct iwl_cfg iwl6000g2b_2bgn_cfg = {
@@ -657,6 +662,7 @@ struct iwl_cfg iwl6000g2b_2bgn_cfg = {
657 .need_temp_offset_calib = true, 662 .need_temp_offset_calib = true,
658 /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */ 663 /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */
659 .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A, 664 .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A,
665 .use_new_eeprom_reading = true,
660}; 666};
661 667
662struct iwl_cfg iwl6000g2b_2bg_cfg = { 668struct iwl_cfg iwl6000g2b_2bg_cfg = {
@@ -677,6 +683,7 @@ struct iwl_cfg iwl6000g2b_2bg_cfg = {
677 .need_temp_offset_calib = true, 683 .need_temp_offset_calib = true,
678 /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */ 684 /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */
679 .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A, 685 .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A,
686 .use_new_eeprom_reading = true,
680}; 687};
681 688
682struct iwl_cfg iwl6000g2b_bgn_cfg = { 689struct iwl_cfg iwl6000g2b_bgn_cfg = {
@@ -698,6 +705,7 @@ struct iwl_cfg iwl6000g2b_bgn_cfg = {
698 .need_temp_offset_calib = true, 705 .need_temp_offset_calib = true,
699 /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */ 706 /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */
700 .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A, 707 .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A,
708 .use_new_eeprom_reading = true,
701}; 709};
702 710
703struct iwl_cfg iwl6000g2b_bg_cfg = { 711struct iwl_cfg iwl6000g2b_bg_cfg = {
@@ -718,6 +726,7 @@ struct iwl_cfg iwl6000g2b_bg_cfg = {
718 .need_temp_offset_calib = true, 726 .need_temp_offset_calib = true,
719 /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */ 727 /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */
720 .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A, 728 .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A,
729 .use_new_eeprom_reading = true,
721}; 730};
722 731
723/* 732/*
@@ -804,6 +813,7 @@ struct iwl_cfg iwl6050g2_bgn_cfg = {
804 .base_params = &iwl6050_base_params, 813 .base_params = &iwl6050_base_params,
805 .ht_params = &iwl6000_ht_params, 814 .ht_params = &iwl6000_ht_params,
806 .need_dc_calib = true, 815 .need_dc_calib = true,
816 .use_new_eeprom_reading = true,
807}; 817};
808 818
809struct iwl_cfg iwl6050_2abg_cfg = { 819struct iwl_cfg iwl6050_2abg_cfg = {
@@ -857,6 +867,7 @@ struct iwl_cfg iwl130_bgn_cfg = {
857 .need_dc_calib = true, 867 .need_dc_calib = true,
858 /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */ 868 /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */
859 .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A, 869 .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A,
870 .use_new_eeprom_reading = true,
860}; 871};
861 872
862struct iwl_cfg iwl130_bg_cfg = { 873struct iwl_cfg iwl130_bg_cfg = {
@@ -876,6 +887,7 @@ struct iwl_cfg iwl130_bg_cfg = {
876 .need_dc_calib = true, 887 .need_dc_calib = true,
877 /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */ 888 /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */
878 .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A, 889 .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A,
890 .use_new_eeprom_reading = true,
879}; 891};
880 892
881MODULE_FIRMWARE(IWL6000_MODULE_FIRMWARE(IWL6000_UCODE_API_MAX)); 893MODULE_FIRMWARE(IWL6000_MODULE_FIRMWARE(IWL6000_UCODE_API_MAX));
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-eeprom.c b/drivers/net/wireless/iwlwifi/iwl-agn-eeprom.c
index a650baba0809..9eeeda18748d 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn-eeprom.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-eeprom.c
@@ -392,7 +392,7 @@ static s8 iwl_update_channel_txpower(struct iwl_priv *priv,
392/** 392/**
393 * iwlcore_eeprom_enhanced_txpower: process enhanced tx power info 393 * iwlcore_eeprom_enhanced_txpower: process enhanced tx power info
394 */ 394 */
395void iwlcore_eeprom_enhanced_txpower(struct iwl_priv *priv) 395static void iwlcore_eeprom_enhanced_txpower_old(struct iwl_priv *priv)
396{ 396{
397 int eeprom_section_count = 0; 397 int eeprom_section_count = 0;
398 int section, element; 398 int section, element;
@@ -419,7 +419,8 @@ void iwlcore_eeprom_enhanced_txpower(struct iwl_priv *priv)
419 * always check for valid entry before process 419 * always check for valid entry before process
420 * the information 420 * the information
421 */ 421 */
422 if (!enhanced_txpower->common || enhanced_txpower->reserved) 422 if (!(enhanced_txpower->flags || enhanced_txpower->channel) ||
423 enhanced_txpower->delta_20_in_40)
423 continue; 424 continue;
424 425
425 for (element = 0; element < eeprom_section_count; element++) { 426 for (element = 0; element < eeprom_section_count; element++) {
@@ -452,3 +453,86 @@ void iwlcore_eeprom_enhanced_txpower(struct iwl_priv *priv)
452 } 453 }
453 } 454 }
454} 455}
456
457static void
458iwlcore_eeprom_enh_txp_read_element(struct iwl_priv *priv,
459 struct iwl_eeprom_enhanced_txpwr *txp,
460 s8 max_txpower_avg)
461{
462 int ch_idx;
463 bool is_ht40 = txp->flags & IWL_EEPROM_ENH_TXP_FL_40MHZ;
464 enum ieee80211_band band;
465
466 band = txp->flags & IWL_EEPROM_ENH_TXP_FL_BAND_52G ?
467 IEEE80211_BAND_5GHZ : IEEE80211_BAND_2GHZ;
468
469 for (ch_idx = 0; ch_idx < priv->channel_count; ch_idx++) {
470 struct iwl_channel_info *ch_info = &priv->channel_info[ch_idx];
471
472 /* update matching channel or from common data only */
473 if (txp->channel != 0 && ch_info->channel != txp->channel)
474 continue;
475
476 /* update matching band only */
477 if (band != ch_info->band)
478 continue;
479
480 if (ch_info->max_power_avg < max_txpower_avg && !is_ht40) {
481 ch_info->max_power_avg = max_txpower_avg;
482 ch_info->curr_txpow = max_txpower_avg;
483 ch_info->scan_power = max_txpower_avg;
484 }
485
486 if (is_ht40 && ch_info->ht40_max_power_avg < max_txpower_avg)
487 ch_info->ht40_max_power_avg = max_txpower_avg;
488 }
489}
490
491#define EEPROM_TXP_OFFS (0x00 | INDIRECT_ADDRESS | INDIRECT_TXP_LIMIT)
492#define EEPROM_TXP_ENTRY_LEN sizeof(struct iwl_eeprom_enhanced_txpwr)
493#define EEPROM_TXP_SZ_OFFS (0x00 | INDIRECT_ADDRESS | INDIRECT_TXP_LIMIT_SIZE)
494
495static void iwlcore_eeprom_enhanced_txpower_new(struct iwl_priv *priv)
496{
497 struct iwl_eeprom_enhanced_txpwr *txp_array, *txp;
498 int idx, entries;
499 __le16 *txp_len;
500 s8 max_txp_avg, max_txp_avg_halfdbm;
501
502 BUILD_BUG_ON(sizeof(struct iwl_eeprom_enhanced_txpwr) != 8);
503
504 /* the length is in 16-bit words, but we want entries */
505 txp_len = (__le16 *) iwlagn_eeprom_query_addr(priv, EEPROM_TXP_SZ_OFFS);
506 entries = le16_to_cpup(txp_len) * 2 / EEPROM_TXP_ENTRY_LEN;
507
508 txp_array = (void *) iwlagn_eeprom_query_addr(priv, EEPROM_TXP_OFFS);
509 for (idx = 0; idx < entries; idx++) {
510 txp = &txp_array[idx];
511
512 /* skip invalid entries */
513 if (!(txp->flags & IWL_EEPROM_ENH_TXP_FL_VALID))
514 continue;
515
516 max_txp_avg = iwl_get_max_txpower_avg(priv, txp_array, idx,
517 &max_txp_avg_halfdbm);
518
519 /*
520 * Update the user limit values values to the highest
521 * power supported by any channel
522 */
523 if (max_txp_avg > priv->tx_power_user_lmt)
524 priv->tx_power_user_lmt = max_txp_avg;
525 if (max_txp_avg_halfdbm > priv->tx_power_lmt_in_half_dbm)
526 priv->tx_power_lmt_in_half_dbm = max_txp_avg_halfdbm;
527
528 iwlcore_eeprom_enh_txp_read_element(priv, txp, max_txp_avg);
529 }
530}
531
532void iwlcore_eeprom_enhanced_txpower(struct iwl_priv *priv)
533{
534 if (priv->cfg->use_new_eeprom_reading)
535 iwlcore_eeprom_enhanced_txpower_new(priv);
536 else
537 iwlcore_eeprom_enhanced_txpower_old(priv);
538}
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-lib.c b/drivers/net/wireless/iwlwifi/iwl-agn-lib.c
index b555edd53354..554afb7d9670 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn-lib.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-lib.c
@@ -569,6 +569,12 @@ static u32 eeprom_indirect_address(const struct iwl_priv *priv, u32 address)
569 case INDIRECT_REGULATORY: 569 case INDIRECT_REGULATORY:
570 offset = iwl_eeprom_query16(priv, EEPROM_LINK_REGULATORY); 570 offset = iwl_eeprom_query16(priv, EEPROM_LINK_REGULATORY);
571 break; 571 break;
572 case INDIRECT_TXP_LIMIT:
573 offset = iwl_eeprom_query16(priv, EEPROM_LINK_TXP_LIMIT);
574 break;
575 case INDIRECT_TXP_LIMIT_SIZE:
576 offset = iwl_eeprom_query16(priv, EEPROM_LINK_TXP_LIMIT_SIZE);
577 break;
572 case INDIRECT_CALIBRATION: 578 case INDIRECT_CALIBRATION:
573 offset = iwl_eeprom_query16(priv, EEPROM_LINK_CALIBRATION); 579 offset = iwl_eeprom_query16(priv, EEPROM_LINK_CALIBRATION);
574 break; 580 break;
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.h b/drivers/net/wireless/iwlwifi/iwl-core.h
index 64527def059f..954ecc2c34c4 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.h
+++ b/drivers/net/wireless/iwlwifi/iwl-core.h
@@ -390,6 +390,7 @@ struct iwl_cfg {
390 const bool need_temp_offset_calib; /* if used set to true */ 390 const bool need_temp_offset_calib; /* if used set to true */
391 u8 scan_rx_antennas[IEEE80211_NUM_BANDS]; 391 u8 scan_rx_antennas[IEEE80211_NUM_BANDS];
392 u8 scan_tx_antennas[IEEE80211_NUM_BANDS]; 392 u8 scan_tx_antennas[IEEE80211_NUM_BANDS];
393 const bool use_new_eeprom_reading; /* temporary, remove later */
393}; 394};
394 395
395/*************************** 396/***************************
diff --git a/drivers/net/wireless/iwlwifi/iwl-eeprom.h b/drivers/net/wireless/iwlwifi/iwl-eeprom.h
index d9b590625ae4..e3a279d2d0b6 100644
--- a/drivers/net/wireless/iwlwifi/iwl-eeprom.h
+++ b/drivers/net/wireless/iwlwifi/iwl-eeprom.h
@@ -120,6 +120,17 @@ struct iwl_eeprom_channel {
120 s8 max_power_avg; /* max power (dBm) on this chnl, limit 31 */ 120 s8 max_power_avg; /* max power (dBm) on this chnl, limit 31 */
121} __packed; 121} __packed;
122 122
123enum iwl_eeprom_enhanced_txpwr_flags {
124 IWL_EEPROM_ENH_TXP_FL_VALID = BIT(0),
125 IWL_EEPROM_ENH_TXP_FL_BAND_52G = BIT(1),
126 IWL_EEPROM_ENH_TXP_FL_OFDM = BIT(2),
127 IWL_EEPROM_ENH_TXP_FL_40MHZ = BIT(3),
128 IWL_EEPROM_ENH_TXP_FL_HT_AP = BIT(4),
129 IWL_EEPROM_ENH_TXP_FL_RES1 = BIT(5),
130 IWL_EEPROM_ENH_TXP_FL_RES2 = BIT(6),
131 IWL_EEPROM_ENH_TXP_FL_COMMON_TYPE = BIT(7),
132};
133
123/** 134/**
124 * iwl_eeprom_enhanced_txpwr structure 135 * iwl_eeprom_enhanced_txpwr structure
125 * This structure presents the enhanced regulatory tx power limit layout 136 * This structure presents the enhanced regulatory tx power limit layout
@@ -127,21 +138,23 @@ struct iwl_eeprom_channel {
127 * Enhanced regulatory tx power portion of eeprom image can be broken down 138 * Enhanced regulatory tx power portion of eeprom image can be broken down
128 * into individual structures; each one is 8 bytes in size and contain the 139 * into individual structures; each one is 8 bytes in size and contain the
129 * following information 140 * following information
130 * @common: (desc + channel) not used by driver, should _NOT_ be "zero" 141 * @flags: entry flags
142 * @channel: channel number
131 * @chain_a_max_pwr: chain a max power in 1/2 dBm 143 * @chain_a_max_pwr: chain a max power in 1/2 dBm
132 * @chain_b_max_pwr: chain b max power in 1/2 dBm 144 * @chain_b_max_pwr: chain b max power in 1/2 dBm
133 * @chain_c_max_pwr: chain c max power in 1/2 dBm 145 * @chain_c_max_pwr: chain c max power in 1/2 dBm
134 * @reserved: not used, should be "zero" 146 * @delta_20_in_40: 20-in-40 deltas (hi/lo)
135 * @mimo2_max_pwr: mimo2 max power in 1/2 dBm 147 * @mimo2_max_pwr: mimo2 max power in 1/2 dBm
136 * @mimo3_max_pwr: mimo3 max power in 1/2 dBm 148 * @mimo3_max_pwr: mimo3 max power in 1/2 dBm
137 * 149 *
138 */ 150 */
139struct iwl_eeprom_enhanced_txpwr { 151struct iwl_eeprom_enhanced_txpwr {
140 __le16 common; 152 u8 flags;
153 u8 channel;
141 s8 chain_a_max; 154 s8 chain_a_max;
142 s8 chain_b_max; 155 s8 chain_b_max;
143 s8 chain_c_max; 156 s8 chain_c_max;
144 s8 reserved; 157 u8 delta_20_in_40;
145 s8 mimo2_max; 158 s8 mimo2_max;
146 s8 mimo3_max; 159 s8 mimo3_max;
147} __packed; 160} __packed;
@@ -186,6 +199,8 @@ struct iwl_eeprom_enhanced_txpwr {
186#define EEPROM_LINK_CALIBRATION (2*0x67) 199#define EEPROM_LINK_CALIBRATION (2*0x67)
187#define EEPROM_LINK_PROCESS_ADJST (2*0x68) 200#define EEPROM_LINK_PROCESS_ADJST (2*0x68)
188#define EEPROM_LINK_OTHERS (2*0x69) 201#define EEPROM_LINK_OTHERS (2*0x69)
202#define EEPROM_LINK_TXP_LIMIT (2*0x6a)
203#define EEPROM_LINK_TXP_LIMIT_SIZE (2*0x6b)
189 204
190/* agn regulatory - indirect access */ 205/* agn regulatory - indirect access */
191#define EEPROM_REG_BAND_1_CHANNELS ((0x08)\ 206#define EEPROM_REG_BAND_1_CHANNELS ((0x08)\
@@ -389,6 +404,8 @@ struct iwl_eeprom_calib_info {
389#define INDIRECT_CALIBRATION 0x00040000 404#define INDIRECT_CALIBRATION 0x00040000
390#define INDIRECT_PROCESS_ADJST 0x00050000 405#define INDIRECT_PROCESS_ADJST 0x00050000
391#define INDIRECT_OTHERS 0x00060000 406#define INDIRECT_OTHERS 0x00060000
407#define INDIRECT_TXP_LIMIT 0x00070000
408#define INDIRECT_TXP_LIMIT_SIZE 0x00080000
392#define INDIRECT_ADDRESS 0x00100000 409#define INDIRECT_ADDRESS 0x00100000
393 410
394/* General */ 411/* General */
diff --git a/drivers/net/wireless/libertas/cfg.c b/drivers/net/wireless/libertas/cfg.c
index 373930afc26b..113f4f204657 100644
--- a/drivers/net/wireless/libertas/cfg.c
+++ b/drivers/net/wireless/libertas/cfg.c
@@ -619,7 +619,7 @@ static int lbs_ret_scan(struct lbs_private *priv, unsigned long dummy,
619 print_ssid(ssid_buf, ssid, ssid_len), 619 print_ssid(ssid_buf, ssid, ssid_len),
620 LBS_SCAN_RSSI_TO_MBM(rssi)/100); 620 LBS_SCAN_RSSI_TO_MBM(rssi)/100);
621 621
622 if (channel || 622 if (channel &&
623 !(channel->flags & IEEE80211_CHAN_DISABLED)) 623 !(channel->flags & IEEE80211_CHAN_DISABLED))
624 cfg80211_inform_bss(wiphy, channel, 624 cfg80211_inform_bss(wiphy, channel,
625 bssid, le64_to_cpu(*(__le64 *)tsfdesc), 625 bssid, le64_to_cpu(*(__le64 *)tsfdesc),
diff --git a/drivers/net/wireless/p54/p54usb.c b/drivers/net/wireless/p54/p54usb.c
index d5bc21e5a02c..2325e56a9b0b 100644
--- a/drivers/net/wireless/p54/p54usb.c
+++ b/drivers/net/wireless/p54/p54usb.c
@@ -43,6 +43,7 @@ MODULE_FIRMWARE("isl3887usb");
43 43
44static struct usb_device_id p54u_table[] __devinitdata = { 44static struct usb_device_id p54u_table[] __devinitdata = {
45 /* Version 1 devices (pci chip + net2280) */ 45 /* Version 1 devices (pci chip + net2280) */
46 {USB_DEVICE(0x0411, 0x0050)}, /* Buffalo WLI2-USB2-G54 */
46 {USB_DEVICE(0x045e, 0x00c2)}, /* Microsoft MN-710 */ 47 {USB_DEVICE(0x045e, 0x00c2)}, /* Microsoft MN-710 */
47 {USB_DEVICE(0x0506, 0x0a11)}, /* 3COM 3CRWE254G72 */ 48 {USB_DEVICE(0x0506, 0x0a11)}, /* 3COM 3CRWE254G72 */
48 {USB_DEVICE(0x06b9, 0x0120)}, /* Thomson SpeedTouch 120g */ 49 {USB_DEVICE(0x06b9, 0x0120)}, /* Thomson SpeedTouch 120g */
@@ -56,9 +57,13 @@ static struct usb_device_id p54u_table[] __devinitdata = {
56 {USB_DEVICE(0x0846, 0x4220)}, /* Netgear WG111 */ 57 {USB_DEVICE(0x0846, 0x4220)}, /* Netgear WG111 */
57 {USB_DEVICE(0x09aa, 0x1000)}, /* Spinnaker Proto board */ 58 {USB_DEVICE(0x09aa, 0x1000)}, /* Spinnaker Proto board */
58 {USB_DEVICE(0x0cde, 0x0006)}, /* Medion 40900, Roper Europe */ 59 {USB_DEVICE(0x0cde, 0x0006)}, /* Medion 40900, Roper Europe */
60 {USB_DEVICE(0x0db0, 0x6826)}, /* MSI UB54G (MS-6826) */
59 {USB_DEVICE(0x107b, 0x55f2)}, /* Gateway WGU-210 (Gemtek) */ 61 {USB_DEVICE(0x107b, 0x55f2)}, /* Gateway WGU-210 (Gemtek) */
60 {USB_DEVICE(0x124a, 0x4023)}, /* Shuttle PN15, Airvast WM168g, IOGear GWU513 */ 62 {USB_DEVICE(0x124a, 0x4023)}, /* Shuttle PN15, Airvast WM168g, IOGear GWU513 */
63 {USB_DEVICE(0x1435, 0x0210)}, /* Inventel UR054G */
64 {USB_DEVICE(0x15a9, 0x0002)}, /* Gemtek WUBI-100GW 802.11g */
61 {USB_DEVICE(0x1630, 0x0005)}, /* 2Wire 802.11g USB (v1) / Z-Com */ 65 {USB_DEVICE(0x1630, 0x0005)}, /* 2Wire 802.11g USB (v1) / Z-Com */
66 {USB_DEVICE(0x182d, 0x096b)}, /* Sitecom WL-107 */
62 {USB_DEVICE(0x1915, 0x2234)}, /* Linksys WUSB54G OEM */ 67 {USB_DEVICE(0x1915, 0x2234)}, /* Linksys WUSB54G OEM */
63 {USB_DEVICE(0x1915, 0x2235)}, /* Linksys WUSB54G Portable OEM */ 68 {USB_DEVICE(0x1915, 0x2235)}, /* Linksys WUSB54G Portable OEM */
64 {USB_DEVICE(0x2001, 0x3701)}, /* DLink DWL-G120 Spinnaker */ 69 {USB_DEVICE(0x2001, 0x3701)}, /* DLink DWL-G120 Spinnaker */
@@ -94,6 +99,7 @@ static struct usb_device_id p54u_table[] __devinitdata = {
94 {USB_DEVICE(0x1435, 0x0427)}, /* Inventel UR054G */ 99 {USB_DEVICE(0x1435, 0x0427)}, /* Inventel UR054G */
95 {USB_DEVICE(0x1668, 0x1050)}, /* Actiontec 802UIG-1 */ 100 {USB_DEVICE(0x1668, 0x1050)}, /* Actiontec 802UIG-1 */
96 {USB_DEVICE(0x2001, 0x3704)}, /* DLink DWL-G122 rev A2 */ 101 {USB_DEVICE(0x2001, 0x3704)}, /* DLink DWL-G122 rev A2 */
102 {USB_DEVICE(0x2001, 0x3705)}, /* D-Link DWL-G120 rev C1 */
97 {USB_DEVICE(0x413c, 0x5513)}, /* Dell WLA3310 USB Wireless Adapter */ 103 {USB_DEVICE(0x413c, 0x5513)}, /* Dell WLA3310 USB Wireless Adapter */
98 {USB_DEVICE(0x413c, 0x8102)}, /* Spinnaker DUT */ 104 {USB_DEVICE(0x413c, 0x8102)}, /* Spinnaker DUT */
99 {USB_DEVICE(0x413c, 0x8104)}, /* Cohiba Proto board */ 105 {USB_DEVICE(0x413c, 0x8104)}, /* Cohiba Proto board */
diff --git a/drivers/net/wireless/rt2x00/rt2800pci.c b/drivers/net/wireless/rt2x00/rt2800pci.c
index b26739535986..09a67905c230 100644
--- a/drivers/net/wireless/rt2x00/rt2800pci.c
+++ b/drivers/net/wireless/rt2x00/rt2800pci.c
@@ -912,6 +912,7 @@ static int rt2800pci_probe_hw(struct rt2x00_dev *rt2x00dev)
912 __set_bit(DRIVER_REQUIRE_DMA, &rt2x00dev->flags); 912 __set_bit(DRIVER_REQUIRE_DMA, &rt2x00dev->flags);
913 __set_bit(DRIVER_REQUIRE_L2PAD, &rt2x00dev->flags); 913 __set_bit(DRIVER_REQUIRE_L2PAD, &rt2x00dev->flags);
914 __set_bit(DRIVER_REQUIRE_TXSTATUS_FIFO, &rt2x00dev->flags); 914 __set_bit(DRIVER_REQUIRE_TXSTATUS_FIFO, &rt2x00dev->flags);
915 __set_bit(DRIVER_REQUIRE_TASKLET_CONTEXT, &rt2x00dev->flags);
915 if (!modparam_nohwcrypt) 916 if (!modparam_nohwcrypt)
916 __set_bit(CONFIG_SUPPORT_HW_CRYPTO, &rt2x00dev->flags); 917 __set_bit(CONFIG_SUPPORT_HW_CRYPTO, &rt2x00dev->flags);
917 __set_bit(DRIVER_SUPPORT_LINK_TUNING, &rt2x00dev->flags); 918 __set_bit(DRIVER_SUPPORT_LINK_TUNING, &rt2x00dev->flags);
diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h
index 94fe589acfaa..ab43e7ca2a23 100644
--- a/drivers/net/wireless/rt2x00/rt2x00.h
+++ b/drivers/net/wireless/rt2x00/rt2x00.h
@@ -664,6 +664,7 @@ enum rt2x00_flags {
664 DRIVER_REQUIRE_COPY_IV, 664 DRIVER_REQUIRE_COPY_IV,
665 DRIVER_REQUIRE_L2PAD, 665 DRIVER_REQUIRE_L2PAD,
666 DRIVER_REQUIRE_TXSTATUS_FIFO, 666 DRIVER_REQUIRE_TXSTATUS_FIFO,
667 DRIVER_REQUIRE_TASKLET_CONTEXT,
667 668
668 /* 669 /*
669 * Driver features 670 * Driver features
diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c
index 5ba79b935f09..d019830ca840 100644
--- a/drivers/net/wireless/rt2x00/rt2x00dev.c
+++ b/drivers/net/wireless/rt2x00/rt2x00dev.c
@@ -390,9 +390,12 @@ void rt2x00lib_txdone(struct queue_entry *entry,
390 * through a mac80211 library call (RTS/CTS) then we should not 390 * through a mac80211 library call (RTS/CTS) then we should not
391 * send the status report back. 391 * send the status report back.
392 */ 392 */
393 if (!(skbdesc_flags & SKBDESC_NOT_MAC80211)) 393 if (!(skbdesc_flags & SKBDESC_NOT_MAC80211)) {
394 ieee80211_tx_status(rt2x00dev->hw, entry->skb); 394 if (test_bit(DRIVER_REQUIRE_TASKLET_CONTEXT, &rt2x00dev->flags))
395 else 395 ieee80211_tx_status(rt2x00dev->hw, entry->skb);
396 else
397 ieee80211_tx_status_ni(rt2x00dev->hw, entry->skb);
398 } else
396 dev_kfree_skb_any(entry->skb); 399 dev_kfree_skb_any(entry->skb);
397 400
398 /* 401 /*
diff --git a/drivers/net/yellowfin.c b/drivers/net/yellowfin.c
index cd1b3dcd61db..ec47e22fa186 100644
--- a/drivers/net/yellowfin.c
+++ b/drivers/net/yellowfin.c
@@ -744,7 +744,7 @@ static int yellowfin_init_ring(struct net_device *dev)
744 } 744 }
745 745
746 for (i = 0; i < RX_RING_SIZE; i++) { 746 for (i = 0; i < RX_RING_SIZE; i++) {
747 struct sk_buff *skb = dev_alloc_skb(yp->rx_buf_sz); 747 struct sk_buff *skb = dev_alloc_skb(yp->rx_buf_sz + 2);
748 yp->rx_skbuff[i] = skb; 748 yp->rx_skbuff[i] = skb;
749 if (skb == NULL) 749 if (skb == NULL)
750 break; 750 break;
@@ -1157,7 +1157,7 @@ static int yellowfin_rx(struct net_device *dev)
1157 for (; yp->cur_rx - yp->dirty_rx > 0; yp->dirty_rx++) { 1157 for (; yp->cur_rx - yp->dirty_rx > 0; yp->dirty_rx++) {
1158 entry = yp->dirty_rx % RX_RING_SIZE; 1158 entry = yp->dirty_rx % RX_RING_SIZE;
1159 if (yp->rx_skbuff[entry] == NULL) { 1159 if (yp->rx_skbuff[entry] == NULL) {
1160 struct sk_buff *skb = dev_alloc_skb(yp->rx_buf_sz); 1160 struct sk_buff *skb = dev_alloc_skb(yp->rx_buf_sz + 2);
1161 if (skb == NULL) 1161 if (skb == NULL)
1162 break; /* Better luck next round. */ 1162 break; /* Better luck next round. */
1163 yp->rx_skbuff[entry] = skb; 1163 yp->rx_skbuff[entry] = skb;
diff --git a/drivers/of/of_i2c.c b/drivers/of/of_i2c.c
index c85d3c7421fc..f37fbeb66a44 100644
--- a/drivers/of/of_i2c.c
+++ b/drivers/of/of_i2c.c
@@ -61,7 +61,7 @@ void of_i2c_register_devices(struct i2c_adapter *adap)
61 info.of_node = of_node_get(node); 61 info.of_node = of_node_get(node);
62 info.archdata = &dev_ad; 62 info.archdata = &dev_ad;
63 63
64 request_module("%s", info.type); 64 request_module("%s%s", I2C_MODULE_PREFIX, info.type);
65 65
66 result = i2c_new_device(adap, &info); 66 result = i2c_new_device(adap, &info);
67 if (result == NULL) { 67 if (result == NULL) {
diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c
index 2574700db461..5f7226223a62 100644
--- a/drivers/pci/hotplug/pciehp_acpi.c
+++ b/drivers/pci/hotplug/pciehp_acpi.c
@@ -115,7 +115,8 @@ static struct pcie_port_service_driver __initdata dummy_driver = {
115static int __init select_detection_mode(void) 115static int __init select_detection_mode(void)
116{ 116{
117 struct dummy_slot *slot, *tmp; 117 struct dummy_slot *slot, *tmp;
118 pcie_port_service_register(&dummy_driver); 118 if (pcie_port_service_register(&dummy_driver))
119 return PCIEHP_DETECT_ACPI;
119 pcie_port_service_unregister(&dummy_driver); 120 pcie_port_service_unregister(&dummy_driver);
120 list_for_each_entry_safe(slot, tmp, &dummy_slots, list) { 121 list_for_each_entry_safe(slot, tmp, &dummy_slots, list) {
121 list_del(&slot->list); 122 list_del(&slot->list);
diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
index c44a5e8b8b82..f0b3ad13c273 100644
--- a/drivers/platform/x86/intel_ips.c
+++ b/drivers/platform/x86/intel_ips.c
@@ -75,6 +75,7 @@
75#include <drm/i915_drm.h> 75#include <drm/i915_drm.h>
76#include <asm/msr.h> 76#include <asm/msr.h>
77#include <asm/processor.h> 77#include <asm/processor.h>
78#include "intel_ips.h"
78 79
79#define PCI_DEVICE_ID_INTEL_THERMAL_SENSOR 0x3b32 80#define PCI_DEVICE_ID_INTEL_THERMAL_SENSOR 0x3b32
80 81
@@ -245,6 +246,7 @@
245#define thm_writel(off, val) writel((val), ips->regmap + (off)) 246#define thm_writel(off, val) writel((val), ips->regmap + (off))
246 247
247static const int IPS_ADJUST_PERIOD = 5000; /* ms */ 248static const int IPS_ADJUST_PERIOD = 5000; /* ms */
249static bool late_i915_load = false;
248 250
249/* For initial average collection */ 251/* For initial average collection */
250static const int IPS_SAMPLE_PERIOD = 200; /* ms */ 252static const int IPS_SAMPLE_PERIOD = 200; /* ms */
@@ -339,6 +341,9 @@ struct ips_driver {
339 u64 orig_turbo_ratios; 341 u64 orig_turbo_ratios;
340}; 342};
341 343
344static bool
345ips_gpu_turbo_enabled(struct ips_driver *ips);
346
342/** 347/**
343 * ips_cpu_busy - is CPU busy? 348 * ips_cpu_busy - is CPU busy?
344 * @ips: IPS driver struct 349 * @ips: IPS driver struct
@@ -517,7 +522,7 @@ static void ips_disable_cpu_turbo(struct ips_driver *ips)
517 */ 522 */
518static bool ips_gpu_busy(struct ips_driver *ips) 523static bool ips_gpu_busy(struct ips_driver *ips)
519{ 524{
520 if (!ips->gpu_turbo_enabled) 525 if (!ips_gpu_turbo_enabled(ips))
521 return false; 526 return false;
522 527
523 return ips->gpu_busy(); 528 return ips->gpu_busy();
@@ -532,7 +537,7 @@ static bool ips_gpu_busy(struct ips_driver *ips)
532 */ 537 */
533static void ips_gpu_raise(struct ips_driver *ips) 538static void ips_gpu_raise(struct ips_driver *ips)
534{ 539{
535 if (!ips->gpu_turbo_enabled) 540 if (!ips_gpu_turbo_enabled(ips))
536 return; 541 return;
537 542
538 if (!ips->gpu_raise()) 543 if (!ips->gpu_raise())
@@ -549,7 +554,7 @@ static void ips_gpu_raise(struct ips_driver *ips)
549 */ 554 */
550static void ips_gpu_lower(struct ips_driver *ips) 555static void ips_gpu_lower(struct ips_driver *ips)
551{ 556{
552 if (!ips->gpu_turbo_enabled) 557 if (!ips_gpu_turbo_enabled(ips))
553 return; 558 return;
554 559
555 if (!ips->gpu_lower()) 560 if (!ips->gpu_lower())
@@ -1454,6 +1459,31 @@ out_err:
1454 return false; 1459 return false;
1455} 1460}
1456 1461
1462static bool
1463ips_gpu_turbo_enabled(struct ips_driver *ips)
1464{
1465 if (!ips->gpu_busy && late_i915_load) {
1466 if (ips_get_i915_syms(ips)) {
1467 dev_info(&ips->dev->dev,
1468 "i915 driver attached, reenabling gpu turbo\n");
1469 ips->gpu_turbo_enabled = !(thm_readl(THM_HTS) & HTS_GTD_DIS);
1470 }
1471 }
1472
1473 return ips->gpu_turbo_enabled;
1474}
1475
1476void
1477ips_link_to_i915_driver()
1478{
1479 /* We can't cleanly get at the various ips_driver structs from
1480 * this caller (the i915 driver), so just set a flag saying
1481 * that it's time to try getting the symbols again.
1482 */
1483 late_i915_load = true;
1484}
1485EXPORT_SYMBOL_GPL(ips_link_to_i915_driver);
1486
1457static DEFINE_PCI_DEVICE_TABLE(ips_id_table) = { 1487static DEFINE_PCI_DEVICE_TABLE(ips_id_table) = {
1458 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 1488 { PCI_DEVICE(PCI_VENDOR_ID_INTEL,
1459 PCI_DEVICE_ID_INTEL_THERMAL_SENSOR), }, 1489 PCI_DEVICE_ID_INTEL_THERMAL_SENSOR), },
diff --git a/drivers/platform/x86/intel_ips.h b/drivers/platform/x86/intel_ips.h
new file mode 100644
index 000000000000..73299beff5b3
--- /dev/null
+++ b/drivers/platform/x86/intel_ips.h
@@ -0,0 +1,21 @@
1/*
2 * Copyright (c) 2010 Intel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
16 *
17 * The full GNU General Public License is included in this distribution in
18 * the file called "COPYING".
19 */
20
21void ips_link_to_i915_driver(void);
diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c
index 41a9e34899ac..ca35b0ce944a 100644
--- a/drivers/platform/x86/intel_scu_ipc.c
+++ b/drivers/platform/x86/intel_scu_ipc.c
@@ -26,6 +26,7 @@
26#include <linux/sfi.h> 26#include <linux/sfi.h>
27#include <asm/mrst.h> 27#include <asm/mrst.h>
28#include <asm/intel_scu_ipc.h> 28#include <asm/intel_scu_ipc.h>
29#include <asm/mrst.h>
29 30
30/* IPC defines the following message types */ 31/* IPC defines the following message types */
31#define IPCMSG_WATCHDOG_TIMER 0xF8 /* Set Kernel Watchdog Threshold */ 32#define IPCMSG_WATCHDOG_TIMER 0xF8 /* Set Kernel Watchdog Threshold */
@@ -699,6 +700,9 @@ static int ipc_probe(struct pci_dev *dev, const struct pci_device_id *id)
699 iounmap(ipcdev.ipc_base); 700 iounmap(ipcdev.ipc_base);
700 return -ENOMEM; 701 return -ENOMEM;
701 } 702 }
703
704 intel_scu_devices_create();
705
702 return 0; 706 return 0;
703} 707}
704 708
@@ -720,6 +724,7 @@ static void ipc_remove(struct pci_dev *pdev)
720 iounmap(ipcdev.ipc_base); 724 iounmap(ipcdev.ipc_base);
721 iounmap(ipcdev.i2c_base); 725 iounmap(ipcdev.i2c_base);
722 ipcdev.pdev = NULL; 726 ipcdev.pdev = NULL;
727 intel_scu_devices_destroy();
723} 728}
724 729
725static const struct pci_device_id pci_ids[] = { 730static const struct pci_device_id pci_ids[] = {
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 2883428d5ac8..4941cade319f 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -463,6 +463,18 @@ config RTC_DRV_CMOS
463 This driver can also be built as a module. If so, the module 463 This driver can also be built as a module. If so, the module
464 will be called rtc-cmos. 464 will be called rtc-cmos.
465 465
466config RTC_DRV_VRTC
467 tristate "Virtual RTC for Moorestown platforms"
468 depends on X86_MRST
469 default y if X86_MRST
470
471 help
472 Say "yes" here to get direct support for the real time clock
473 found on Moorestown platforms. The VRTC is a emulated RTC that
474 derives its clock source from a real RTC in the PMIC. The MC146818
475 style programming interface is mostly conserved, but any
476 updates are done via IPC calls to the system controller FW.
477
466config RTC_DRV_DS1216 478config RTC_DRV_DS1216
467 tristate "Dallas DS1216" 479 tristate "Dallas DS1216"
468 depends on SNI_RM 480 depends on SNI_RM
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 4c2832df4697..2afdaf3ff986 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_RTC_DRV_CMOS) += rtc-cmos.o
30obj-$(CONFIG_RTC_DRV_COH901331) += rtc-coh901331.o 30obj-$(CONFIG_RTC_DRV_COH901331) += rtc-coh901331.o
31obj-$(CONFIG_RTC_DRV_DAVINCI) += rtc-davinci.o 31obj-$(CONFIG_RTC_DRV_DAVINCI) += rtc-davinci.o
32obj-$(CONFIG_RTC_DRV_DM355EVM) += rtc-dm355evm.o 32obj-$(CONFIG_RTC_DRV_DM355EVM) += rtc-dm355evm.o
33obj-$(CONFIG_RTC_DRV_VRTC) += rtc-mrst.o
33obj-$(CONFIG_RTC_DRV_DS1216) += rtc-ds1216.o 34obj-$(CONFIG_RTC_DRV_DS1216) += rtc-ds1216.o
34obj-$(CONFIG_RTC_DRV_DS1286) += rtc-ds1286.o 35obj-$(CONFIG_RTC_DRV_DS1286) += rtc-ds1286.o
35obj-$(CONFIG_RTC_DRV_DS1302) += rtc-ds1302.o 36obj-$(CONFIG_RTC_DRV_DS1302) += rtc-ds1302.o
diff --git a/drivers/rtc/rtc-mrst.c b/drivers/rtc/rtc-mrst.c
new file mode 100644
index 000000000000..bcd0cf63eb16
--- /dev/null
+++ b/drivers/rtc/rtc-mrst.c
@@ -0,0 +1,582 @@
1/*
2 * rtc-mrst.c: Driver for Moorestown virtual RTC
3 *
4 * (C) Copyright 2009 Intel Corporation
5 * Author: Jacob Pan (jacob.jun.pan@intel.com)
6 * Feng Tang (feng.tang@intel.com)
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; version 2
11 * of the License.
12 *
13 * Note:
14 * VRTC is emulated by system controller firmware, the real HW
15 * RTC is located in the PMIC device. SCU FW shadows PMIC RTC
16 * in a memory mapped IO space that is visible to the host IA
17 * processor.
18 *
19 * This driver is based upon drivers/rtc/rtc-cmos.c
20 */
21
22/*
23 * Note:
24 * * vRTC only supports binary mode and 24H mode
25 * * vRTC only support PIE and AIE, no UIE, and its PIE only happens
26 * at 23:59:59pm everyday, no support for adjustable frequency
27 * * Alarm function is also limited to hr/min/sec.
28 */
29
30#include <linux/mod_devicetable.h>
31#include <linux/platform_device.h>
32#include <linux/interrupt.h>
33#include <linux/spinlock.h>
34#include <linux/kernel.h>
35#include <linux/module.h>
36#include <linux/init.h>
37#include <linux/sfi.h>
38
39#include <asm-generic/rtc.h>
40#include <asm/intel_scu_ipc.h>
41#include <asm/mrst.h>
42#include <asm/mrst-vrtc.h>
43
44struct mrst_rtc {
45 struct rtc_device *rtc;
46 struct device *dev;
47 int irq;
48 struct resource *iomem;
49
50 u8 enabled_wake;
51 u8 suspend_ctrl;
52};
53
54static const char driver_name[] = "rtc_mrst";
55
56#define RTC_IRQMASK (RTC_PF | RTC_AF)
57
58static inline int is_intr(u8 rtc_intr)
59{
60 if (!(rtc_intr & RTC_IRQF))
61 return 0;
62 return rtc_intr & RTC_IRQMASK;
63}
64
65/*
66 * rtc_time's year contains the increment over 1900, but vRTC's YEAR
67 * register can't be programmed to value larger than 0x64, so vRTC
68 * driver chose to use 1960 (1970 is UNIX time start point) as the base,
69 * and does the translation at read/write time.
70 *
71 * Why not just use 1970 as the offset? it's because using 1960 will
72 * make it consistent in leap year setting for both vrtc and low-level
73 * physical rtc devices.
74 */
75static int mrst_read_time(struct device *dev, struct rtc_time *time)
76{
77 unsigned long flags;
78
79 if (rtc_is_updating())
80 mdelay(20);
81
82 spin_lock_irqsave(&rtc_lock, flags);
83 time->tm_sec = vrtc_cmos_read(RTC_SECONDS);
84 time->tm_min = vrtc_cmos_read(RTC_MINUTES);
85 time->tm_hour = vrtc_cmos_read(RTC_HOURS);
86 time->tm_mday = vrtc_cmos_read(RTC_DAY_OF_MONTH);
87 time->tm_mon = vrtc_cmos_read(RTC_MONTH);
88 time->tm_year = vrtc_cmos_read(RTC_YEAR);
89 spin_unlock_irqrestore(&rtc_lock, flags);
90
91 /* Adjust for the 1960/1900 */
92 time->tm_year += 60;
93 time->tm_mon--;
94 return RTC_24H;
95}
96
97static int mrst_set_time(struct device *dev, struct rtc_time *time)
98{
99 int ret;
100 unsigned long flags;
101 unsigned char mon, day, hrs, min, sec;
102 unsigned int yrs;
103
104 yrs = time->tm_year;
105 mon = time->tm_mon + 1; /* tm_mon starts at zero */
106 day = time->tm_mday;
107 hrs = time->tm_hour;
108 min = time->tm_min;
109 sec = time->tm_sec;
110
111 if (yrs < 70 || yrs > 138)
112 return -EINVAL;
113 yrs -= 60;
114
115 spin_lock_irqsave(&rtc_lock, flags);
116
117 vrtc_cmos_write(yrs, RTC_YEAR);
118 vrtc_cmos_write(mon, RTC_MONTH);
119 vrtc_cmos_write(day, RTC_DAY_OF_MONTH);
120 vrtc_cmos_write(hrs, RTC_HOURS);
121 vrtc_cmos_write(min, RTC_MINUTES);
122 vrtc_cmos_write(sec, RTC_SECONDS);
123
124 spin_unlock_irqrestore(&rtc_lock, flags);
125
126 ret = intel_scu_ipc_simple_command(IPCMSG_VRTC, IPC_CMD_VRTC_SETTIME);
127 return ret;
128}
129
130static int mrst_read_alarm(struct device *dev, struct rtc_wkalrm *t)
131{
132 struct mrst_rtc *mrst = dev_get_drvdata(dev);
133 unsigned char rtc_control;
134
135 if (mrst->irq <= 0)
136 return -EIO;
137
138 /* Basic alarms only support hour, minute, and seconds fields.
139 * Some also support day and month, for alarms up to a year in
140 * the future.
141 */
142 t->time.tm_mday = -1;
143 t->time.tm_mon = -1;
144 t->time.tm_year = -1;
145
146 /* vRTC only supports binary mode */
147 spin_lock_irq(&rtc_lock);
148 t->time.tm_sec = vrtc_cmos_read(RTC_SECONDS_ALARM);
149 t->time.tm_min = vrtc_cmos_read(RTC_MINUTES_ALARM);
150 t->time.tm_hour = vrtc_cmos_read(RTC_HOURS_ALARM);
151
152 rtc_control = vrtc_cmos_read(RTC_CONTROL);
153 spin_unlock_irq(&rtc_lock);
154
155 t->enabled = !!(rtc_control & RTC_AIE);
156 t->pending = 0;
157
158 return 0;
159}
160
161static void mrst_checkintr(struct mrst_rtc *mrst, unsigned char rtc_control)
162{
163 unsigned char rtc_intr;
164
165 /*
166 * NOTE after changing RTC_xIE bits we always read INTR_FLAGS;
167 * allegedly some older rtcs need that to handle irqs properly
168 */
169 rtc_intr = vrtc_cmos_read(RTC_INTR_FLAGS);
170 rtc_intr &= (rtc_control & RTC_IRQMASK) | RTC_IRQF;
171 if (is_intr(rtc_intr))
172 rtc_update_irq(mrst->rtc, 1, rtc_intr);
173}
174
175static void mrst_irq_enable(struct mrst_rtc *mrst, unsigned char mask)
176{
177 unsigned char rtc_control;
178
179 /*
180 * Flush any pending IRQ status, notably for update irqs,
181 * before we enable new IRQs
182 */
183 rtc_control = vrtc_cmos_read(RTC_CONTROL);
184 mrst_checkintr(mrst, rtc_control);
185
186 rtc_control |= mask;
187 vrtc_cmos_write(rtc_control, RTC_CONTROL);
188
189 mrst_checkintr(mrst, rtc_control);
190}
191
192static void mrst_irq_disable(struct mrst_rtc *mrst, unsigned char mask)
193{
194 unsigned char rtc_control;
195
196 rtc_control = vrtc_cmos_read(RTC_CONTROL);
197 rtc_control &= ~mask;
198 vrtc_cmos_write(rtc_control, RTC_CONTROL);
199 mrst_checkintr(mrst, rtc_control);
200}
201
202static int mrst_set_alarm(struct device *dev, struct rtc_wkalrm *t)
203{
204 struct mrst_rtc *mrst = dev_get_drvdata(dev);
205 unsigned char hrs, min, sec;
206 int ret = 0;
207
208 if (!mrst->irq)
209 return -EIO;
210
211 hrs = t->time.tm_hour;
212 min = t->time.tm_min;
213 sec = t->time.tm_sec;
214
215 spin_lock_irq(&rtc_lock);
216 /* Next rtc irq must not be from previous alarm setting */
217 mrst_irq_disable(mrst, RTC_AIE);
218
219 /* Update alarm */
220 vrtc_cmos_write(hrs, RTC_HOURS_ALARM);
221 vrtc_cmos_write(min, RTC_MINUTES_ALARM);
222 vrtc_cmos_write(sec, RTC_SECONDS_ALARM);
223
224 spin_unlock_irq(&rtc_lock);
225
226 ret = intel_scu_ipc_simple_command(IPCMSG_VRTC, IPC_CMD_VRTC_SETALARM);
227 if (ret)
228 return ret;
229
230 spin_lock_irq(&rtc_lock);
231 if (t->enabled)
232 mrst_irq_enable(mrst, RTC_AIE);
233
234 spin_unlock_irq(&rtc_lock);
235
236 return 0;
237}
238
239static int mrst_irq_set_state(struct device *dev, int enabled)
240{
241 struct mrst_rtc *mrst = dev_get_drvdata(dev);
242 unsigned long flags;
243
244 if (!mrst->irq)
245 return -ENXIO;
246
247 spin_lock_irqsave(&rtc_lock, flags);
248
249 if (enabled)
250 mrst_irq_enable(mrst, RTC_PIE);
251 else
252 mrst_irq_disable(mrst, RTC_PIE);
253
254 spin_unlock_irqrestore(&rtc_lock, flags);
255 return 0;
256}
257
258#if defined(CONFIG_RTC_INTF_DEV) || defined(CONFIG_RTC_INTF_DEV_MODULE)
259
260/* Currently, the vRTC doesn't support UIE ON/OFF */
261static int
262mrst_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
263{
264 struct mrst_rtc *mrst = dev_get_drvdata(dev);
265 unsigned long flags;
266
267 switch (cmd) {
268 case RTC_AIE_OFF:
269 case RTC_AIE_ON:
270 if (!mrst->irq)
271 return -EINVAL;
272 break;
273 default:
274 /* PIE ON/OFF is handled by mrst_irq_set_state() */
275 return -ENOIOCTLCMD;
276 }
277
278 spin_lock_irqsave(&rtc_lock, flags);
279 switch (cmd) {
280 case RTC_AIE_OFF: /* alarm off */
281 mrst_irq_disable(mrst, RTC_AIE);
282 break;
283 case RTC_AIE_ON: /* alarm on */
284 mrst_irq_enable(mrst, RTC_AIE);
285 break;
286 }
287 spin_unlock_irqrestore(&rtc_lock, flags);
288 return 0;
289}
290
291#else
292#define mrst_rtc_ioctl NULL
293#endif
294
295#if defined(CONFIG_RTC_INTF_PROC) || defined(CONFIG_RTC_INTF_PROC_MODULE)
296
297static int mrst_procfs(struct device *dev, struct seq_file *seq)
298{
299 unsigned char rtc_control, valid;
300
301 spin_lock_irq(&rtc_lock);
302 rtc_control = vrtc_cmos_read(RTC_CONTROL);
303 valid = vrtc_cmos_read(RTC_VALID);
304 spin_unlock_irq(&rtc_lock);
305
306 return seq_printf(seq,
307 "periodic_IRQ\t: %s\n"
308 "alarm\t\t: %s\n"
309 "BCD\t\t: no\n"
310 "periodic_freq\t: daily (not adjustable)\n",
311 (rtc_control & RTC_PIE) ? "on" : "off",
312 (rtc_control & RTC_AIE) ? "on" : "off");
313}
314
315#else
316#define mrst_procfs NULL
317#endif
318
319static const struct rtc_class_ops mrst_rtc_ops = {
320 .ioctl = mrst_rtc_ioctl,
321 .read_time = mrst_read_time,
322 .set_time = mrst_set_time,
323 .read_alarm = mrst_read_alarm,
324 .set_alarm = mrst_set_alarm,
325 .proc = mrst_procfs,
326 .irq_set_state = mrst_irq_set_state,
327};
328
329static struct mrst_rtc mrst_rtc;
330
331/*
332 * When vRTC IRQ is captured by SCU FW, FW will clear the AIE bit in
333 * Reg B, so no need for this driver to clear it
334 */
335static irqreturn_t mrst_rtc_irq(int irq, void *p)
336{
337 u8 irqstat;
338
339 spin_lock(&rtc_lock);
340 /* This read will clear all IRQ flags inside Reg C */
341 irqstat = vrtc_cmos_read(RTC_INTR_FLAGS);
342 spin_unlock(&rtc_lock);
343
344 irqstat &= RTC_IRQMASK | RTC_IRQF;
345 if (is_intr(irqstat)) {
346 rtc_update_irq(p, 1, irqstat);
347 return IRQ_HANDLED;
348 }
349 return IRQ_NONE;
350}
351
352static int __init
353vrtc_mrst_do_probe(struct device *dev, struct resource *iomem, int rtc_irq)
354{
355 int retval = 0;
356 unsigned char rtc_control;
357
358 /* There can be only one ... */
359 if (mrst_rtc.dev)
360 return -EBUSY;
361
362 if (!iomem)
363 return -ENODEV;
364
365 iomem = request_mem_region(iomem->start,
366 iomem->end + 1 - iomem->start,
367 driver_name);
368 if (!iomem) {
369 dev_dbg(dev, "i/o mem already in use.\n");
370 return -EBUSY;
371 }
372
373 mrst_rtc.irq = rtc_irq;
374 mrst_rtc.iomem = iomem;
375
376 mrst_rtc.rtc = rtc_device_register(driver_name, dev,
377 &mrst_rtc_ops, THIS_MODULE);
378 if (IS_ERR(mrst_rtc.rtc)) {
379 retval = PTR_ERR(mrst_rtc.rtc);
380 goto cleanup0;
381 }
382
383 mrst_rtc.dev = dev;
384 dev_set_drvdata(dev, &mrst_rtc);
385 rename_region(iomem, dev_name(&mrst_rtc.rtc->dev));
386
387 spin_lock_irq(&rtc_lock);
388 mrst_irq_disable(&mrst_rtc, RTC_PIE | RTC_AIE);
389 rtc_control = vrtc_cmos_read(RTC_CONTROL);
390 spin_unlock_irq(&rtc_lock);
391
392 if (!(rtc_control & RTC_24H) || (rtc_control & (RTC_DM_BINARY)))
393 dev_dbg(dev, "TODO: support more than 24-hr BCD mode\n");
394
395 if (rtc_irq) {
396 retval = request_irq(rtc_irq, mrst_rtc_irq,
397 IRQF_DISABLED, dev_name(&mrst_rtc.rtc->dev),
398 mrst_rtc.rtc);
399 if (retval < 0) {
400 dev_dbg(dev, "IRQ %d is already in use, err %d\n",
401 rtc_irq, retval);
402 goto cleanup1;
403 }
404 }
405 dev_dbg(dev, "initialised\n");
406 return 0;
407
408cleanup1:
409 mrst_rtc.dev = NULL;
410 rtc_device_unregister(mrst_rtc.rtc);
411cleanup0:
412 release_region(iomem->start, iomem->end + 1 - iomem->start);
413 dev_err(dev, "rtc-mrst: unable to initialise\n");
414 return retval;
415}
416
417static void rtc_mrst_do_shutdown(void)
418{
419 spin_lock_irq(&rtc_lock);
420 mrst_irq_disable(&mrst_rtc, RTC_IRQMASK);
421 spin_unlock_irq(&rtc_lock);
422}
423
424static void __exit rtc_mrst_do_remove(struct device *dev)
425{
426 struct mrst_rtc *mrst = dev_get_drvdata(dev);
427 struct resource *iomem;
428
429 rtc_mrst_do_shutdown();
430
431 if (mrst->irq)
432 free_irq(mrst->irq, mrst->rtc);
433
434 rtc_device_unregister(mrst->rtc);
435 mrst->rtc = NULL;
436
437 iomem = mrst->iomem;
438 release_region(iomem->start, iomem->end + 1 - iomem->start);
439 mrst->iomem = NULL;
440
441 mrst->dev = NULL;
442 dev_set_drvdata(dev, NULL);
443}
444
445#ifdef CONFIG_PM
446static int mrst_suspend(struct device *dev, pm_message_t mesg)
447{
448 struct mrst_rtc *mrst = dev_get_drvdata(dev);
449 unsigned char tmp;
450
451 /* Only the alarm might be a wakeup event source */
452 spin_lock_irq(&rtc_lock);
453 mrst->suspend_ctrl = tmp = vrtc_cmos_read(RTC_CONTROL);
454 if (tmp & (RTC_PIE | RTC_AIE)) {
455 unsigned char mask;
456
457 if (device_may_wakeup(dev))
458 mask = RTC_IRQMASK & ~RTC_AIE;
459 else
460 mask = RTC_IRQMASK;
461 tmp &= ~mask;
462 vrtc_cmos_write(tmp, RTC_CONTROL);
463
464 mrst_checkintr(mrst, tmp);
465 }
466 spin_unlock_irq(&rtc_lock);
467
468 if (tmp & RTC_AIE) {
469 mrst->enabled_wake = 1;
470 enable_irq_wake(mrst->irq);
471 }
472
473 dev_dbg(&mrst_rtc.rtc->dev, "suspend%s, ctrl %02x\n",
474 (tmp & RTC_AIE) ? ", alarm may wake" : "",
475 tmp);
476
477 return 0;
478}
479
480/*
481 * We want RTC alarms to wake us from the deep power saving state
482 */
483static inline int mrst_poweroff(struct device *dev)
484{
485 return mrst_suspend(dev, PMSG_HIBERNATE);
486}
487
488static int mrst_resume(struct device *dev)
489{
490 struct mrst_rtc *mrst = dev_get_drvdata(dev);
491 unsigned char tmp = mrst->suspend_ctrl;
492
493 /* Re-enable any irqs previously active */
494 if (tmp & RTC_IRQMASK) {
495 unsigned char mask;
496
497 if (mrst->enabled_wake) {
498 disable_irq_wake(mrst->irq);
499 mrst->enabled_wake = 0;
500 }
501
502 spin_lock_irq(&rtc_lock);
503 do {
504 vrtc_cmos_write(tmp, RTC_CONTROL);
505
506 mask = vrtc_cmos_read(RTC_INTR_FLAGS);
507 mask &= (tmp & RTC_IRQMASK) | RTC_IRQF;
508 if (!is_intr(mask))
509 break;
510
511 rtc_update_irq(mrst->rtc, 1, mask);
512 tmp &= ~RTC_AIE;
513 } while (mask & RTC_AIE);
514 spin_unlock_irq(&rtc_lock);
515 }
516
517 dev_dbg(&mrst_rtc.rtc->dev, "resume, ctrl %02x\n", tmp);
518
519 return 0;
520}
521
522#else
523#define mrst_suspend NULL
524#define mrst_resume NULL
525
526static inline int mrst_poweroff(struct device *dev)
527{
528 return -ENOSYS;
529}
530
531#endif
532
533static int __init vrtc_mrst_platform_probe(struct platform_device *pdev)
534{
535 return vrtc_mrst_do_probe(&pdev->dev,
536 platform_get_resource(pdev, IORESOURCE_MEM, 0),
537 platform_get_irq(pdev, 0));
538}
539
540static int __exit vrtc_mrst_platform_remove(struct platform_device *pdev)
541{
542 rtc_mrst_do_remove(&pdev->dev);
543 return 0;
544}
545
546static void vrtc_mrst_platform_shutdown(struct platform_device *pdev)
547{
548 if (system_state == SYSTEM_POWER_OFF && !mrst_poweroff(&pdev->dev))
549 return;
550
551 rtc_mrst_do_shutdown();
552}
553
554MODULE_ALIAS("platform:vrtc_mrst");
555
556static struct platform_driver vrtc_mrst_platform_driver = {
557 .probe = vrtc_mrst_platform_probe,
558 .remove = __exit_p(vrtc_mrst_platform_remove),
559 .shutdown = vrtc_mrst_platform_shutdown,
560 .driver = {
561 .name = (char *) driver_name,
562 .suspend = mrst_suspend,
563 .resume = mrst_resume,
564 }
565};
566
567static int __init vrtc_mrst_init(void)
568{
569 return platform_driver_register(&vrtc_mrst_platform_driver);
570}
571
572static void __exit vrtc_mrst_exit(void)
573{
574 platform_driver_unregister(&vrtc_mrst_platform_driver);
575}
576
577module_init(vrtc_mrst_init);
578module_exit(vrtc_mrst_exit);
579
580MODULE_AUTHOR("Jacob Pan; Feng Tang");
581MODULE_DESCRIPTION("Driver for Moorestown virtual RTC");
582MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c
index 90cf0a6ff23e..dd14e202c2c8 100644
--- a/drivers/rtc/rtc-rs5c372.c
+++ b/drivers/rtc/rtc-rs5c372.c
@@ -207,7 +207,7 @@ static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm)
207static int rs5c372_set_datetime(struct i2c_client *client, struct rtc_time *tm) 207static int rs5c372_set_datetime(struct i2c_client *client, struct rtc_time *tm)
208{ 208{
209 struct rs5c372 *rs5c = i2c_get_clientdata(client); 209 struct rs5c372 *rs5c = i2c_get_clientdata(client);
210 unsigned char buf[8]; 210 unsigned char buf[7];
211 int addr; 211 int addr;
212 212
213 dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d " 213 dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d "
diff --git a/drivers/scsi/bfa/bfa_fcs.c b/drivers/scsi/bfa/bfa_fcs.c
index c94502dfac66..045d7e87b632 100644
--- a/drivers/scsi/bfa/bfa_fcs.c
+++ b/drivers/scsi/bfa/bfa_fcs.c
@@ -677,7 +677,7 @@ bfa_fcs_fabric_sm_isolated(struct bfa_fcs_fabric_s *fabric,
677 bfa_trc(fabric->fcs, event); 677 bfa_trc(fabric->fcs, event);
678 wwn2str(pwwn_ptr, fabric->bport.port_cfg.pwwn); 678 wwn2str(pwwn_ptr, fabric->bport.port_cfg.pwwn);
679 679
680 BFA_LOG(KERN_INFO, bfad, log_level, 680 BFA_LOG(KERN_INFO, bfad, bfa_log_level,
681 "Port is isolated due to VF_ID mismatch. " 681 "Port is isolated due to VF_ID mismatch. "
682 "PWWN: %s Port VF_ID: %04x switch port VF_ID: %04x.", 682 "PWWN: %s Port VF_ID: %04x switch port VF_ID: %04x.",
683 pwwn_ptr, fabric->fcs->port_vfid, 683 pwwn_ptr, fabric->fcs->port_vfid,
@@ -1411,7 +1411,7 @@ bfa_fcs_fabric_set_fabric_name(struct bfa_fcs_fabric_s *fabric,
1411 wwn2str(pwwn_ptr, bfa_fcs_lport_get_pwwn(&fabric->bport)); 1411 wwn2str(pwwn_ptr, bfa_fcs_lport_get_pwwn(&fabric->bport));
1412 wwn2str(fwwn_ptr, 1412 wwn2str(fwwn_ptr,
1413 bfa_fcs_lport_get_fabric_name(&fabric->bport)); 1413 bfa_fcs_lport_get_fabric_name(&fabric->bport));
1414 BFA_LOG(KERN_WARNING, bfad, log_level, 1414 BFA_LOG(KERN_WARNING, bfad, bfa_log_level,
1415 "Base port WWN = %s Fabric WWN = %s\n", 1415 "Base port WWN = %s Fabric WWN = %s\n",
1416 pwwn_ptr, fwwn_ptr); 1416 pwwn_ptr, fwwn_ptr);
1417 } 1417 }
diff --git a/drivers/scsi/bfa/bfa_fcs_fcpim.c b/drivers/scsi/bfa/bfa_fcs_fcpim.c
index 9662bcdeb41d..413b58eef93a 100644
--- a/drivers/scsi/bfa/bfa_fcs_fcpim.c
+++ b/drivers/scsi/bfa/bfa_fcs_fcpim.c
@@ -261,7 +261,7 @@ bfa_fcs_itnim_sm_hcb_online(struct bfa_fcs_itnim_s *itnim,
261 bfa_fcb_itnim_online(itnim->itnim_drv); 261 bfa_fcb_itnim_online(itnim->itnim_drv);
262 wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(itnim->rport->port)); 262 wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(itnim->rport->port));
263 wwn2str(rpwwn_buf, itnim->rport->pwwn); 263 wwn2str(rpwwn_buf, itnim->rport->pwwn);
264 BFA_LOG(KERN_INFO, bfad, log_level, 264 BFA_LOG(KERN_INFO, bfad, bfa_log_level,
265 "Target (WWN = %s) is online for initiator (WWN = %s)\n", 265 "Target (WWN = %s) is online for initiator (WWN = %s)\n",
266 rpwwn_buf, lpwwn_buf); 266 rpwwn_buf, lpwwn_buf);
267 break; 267 break;
@@ -301,11 +301,11 @@ bfa_fcs_itnim_sm_online(struct bfa_fcs_itnim_s *itnim,
301 wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(itnim->rport->port)); 301 wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(itnim->rport->port));
302 wwn2str(rpwwn_buf, itnim->rport->pwwn); 302 wwn2str(rpwwn_buf, itnim->rport->pwwn);
303 if (bfa_fcs_lport_is_online(itnim->rport->port) == BFA_TRUE) 303 if (bfa_fcs_lport_is_online(itnim->rport->port) == BFA_TRUE)
304 BFA_LOG(KERN_ERR, bfad, log_level, 304 BFA_LOG(KERN_ERR, bfad, bfa_log_level,
305 "Target (WWN = %s) connectivity lost for " 305 "Target (WWN = %s) connectivity lost for "
306 "initiator (WWN = %s)\n", rpwwn_buf, lpwwn_buf); 306 "initiator (WWN = %s)\n", rpwwn_buf, lpwwn_buf);
307 else 307 else
308 BFA_LOG(KERN_INFO, bfad, log_level, 308 BFA_LOG(KERN_INFO, bfad, bfa_log_level,
309 "Target (WWN = %s) offlined by initiator (WWN = %s)\n", 309 "Target (WWN = %s) offlined by initiator (WWN = %s)\n",
310 rpwwn_buf, lpwwn_buf); 310 rpwwn_buf, lpwwn_buf);
311 break; 311 break;
diff --git a/drivers/scsi/bfa/bfa_fcs_lport.c b/drivers/scsi/bfa/bfa_fcs_lport.c
index 377cbfff6f2e..8d651309302b 100644
--- a/drivers/scsi/bfa/bfa_fcs_lport.c
+++ b/drivers/scsi/bfa/bfa_fcs_lport.c
@@ -491,7 +491,7 @@ bfa_fcs_lport_online_actions(struct bfa_fcs_lport_s *port)
491 __port_action[port->fabric->fab_type].online(port); 491 __port_action[port->fabric->fab_type].online(port);
492 492
493 wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(port)); 493 wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(port));
494 BFA_LOG(KERN_INFO, bfad, log_level, 494 BFA_LOG(KERN_INFO, bfad, bfa_log_level,
495 "Logical port online: WWN = %s Role = %s\n", 495 "Logical port online: WWN = %s Role = %s\n",
496 lpwwn_buf, "Initiator"); 496 lpwwn_buf, "Initiator");
497 497
@@ -512,11 +512,11 @@ bfa_fcs_lport_offline_actions(struct bfa_fcs_lport_s *port)
512 512
513 wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(port)); 513 wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(port));
514 if (bfa_fcs_fabric_is_online(port->fabric) == BFA_TRUE) 514 if (bfa_fcs_fabric_is_online(port->fabric) == BFA_TRUE)
515 BFA_LOG(KERN_ERR, bfad, log_level, 515 BFA_LOG(KERN_ERR, bfad, bfa_log_level,
516 "Logical port lost fabric connectivity: WWN = %s Role = %s\n", 516 "Logical port lost fabric connectivity: WWN = %s Role = %s\n",
517 lpwwn_buf, "Initiator"); 517 lpwwn_buf, "Initiator");
518 else 518 else
519 BFA_LOG(KERN_INFO, bfad, log_level, 519 BFA_LOG(KERN_INFO, bfad, bfa_log_level,
520 "Logical port taken offline: WWN = %s Role = %s\n", 520 "Logical port taken offline: WWN = %s Role = %s\n",
521 lpwwn_buf, "Initiator"); 521 lpwwn_buf, "Initiator");
522 522
@@ -573,7 +573,7 @@ bfa_fcs_lport_deleted(struct bfa_fcs_lport_s *port)
573 char lpwwn_buf[BFA_STRING_32]; 573 char lpwwn_buf[BFA_STRING_32];
574 574
575 wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(port)); 575 wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(port));
576 BFA_LOG(KERN_INFO, bfad, log_level, 576 BFA_LOG(KERN_INFO, bfad, bfa_log_level,
577 "Logical port deleted: WWN = %s Role = %s\n", 577 "Logical port deleted: WWN = %s Role = %s\n",
578 lpwwn_buf, "Initiator"); 578 lpwwn_buf, "Initiator");
579 579
@@ -878,7 +878,7 @@ bfa_fcs_lport_init(struct bfa_fcs_lport_s *lport,
878 vport ? vport->vport_drv : NULL); 878 vport ? vport->vport_drv : NULL);
879 879
880 wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(lport)); 880 wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(lport));
881 BFA_LOG(KERN_INFO, bfad, log_level, 881 BFA_LOG(KERN_INFO, bfad, bfa_log_level,
882 "New logical port created: WWN = %s Role = %s\n", 882 "New logical port created: WWN = %s Role = %s\n",
883 lpwwn_buf, "Initiator"); 883 lpwwn_buf, "Initiator");
884 884
diff --git a/drivers/scsi/bfa/bfa_fcs_rport.c b/drivers/scsi/bfa/bfa_fcs_rport.c
index 47f35c0ef29a..cf4a6e73e60d 100644
--- a/drivers/scsi/bfa/bfa_fcs_rport.c
+++ b/drivers/scsi/bfa/bfa_fcs_rport.c
@@ -2056,7 +2056,7 @@ bfa_fcs_rport_online_action(struct bfa_fcs_rport_s *rport)
2056 wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(port)); 2056 wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(port));
2057 wwn2str(rpwwn_buf, rport->pwwn); 2057 wwn2str(rpwwn_buf, rport->pwwn);
2058 if (!BFA_FCS_PID_IS_WKA(rport->pid)) 2058 if (!BFA_FCS_PID_IS_WKA(rport->pid))
2059 BFA_LOG(KERN_INFO, bfad, log_level, 2059 BFA_LOG(KERN_INFO, bfad, bfa_log_level,
2060 "Remote port (WWN = %s) online for logical port (WWN = %s)\n", 2060 "Remote port (WWN = %s) online for logical port (WWN = %s)\n",
2061 rpwwn_buf, lpwwn_buf); 2061 rpwwn_buf, lpwwn_buf);
2062} 2062}
@@ -2075,12 +2075,12 @@ bfa_fcs_rport_offline_action(struct bfa_fcs_rport_s *rport)
2075 wwn2str(rpwwn_buf, rport->pwwn); 2075 wwn2str(rpwwn_buf, rport->pwwn);
2076 if (!BFA_FCS_PID_IS_WKA(rport->pid)) { 2076 if (!BFA_FCS_PID_IS_WKA(rport->pid)) {
2077 if (bfa_fcs_lport_is_online(rport->port) == BFA_TRUE) 2077 if (bfa_fcs_lport_is_online(rport->port) == BFA_TRUE)
2078 BFA_LOG(KERN_ERR, bfad, log_level, 2078 BFA_LOG(KERN_ERR, bfad, bfa_log_level,
2079 "Remote port (WWN = %s) connectivity lost for " 2079 "Remote port (WWN = %s) connectivity lost for "
2080 "logical port (WWN = %s)\n", 2080 "logical port (WWN = %s)\n",
2081 rpwwn_buf, lpwwn_buf); 2081 rpwwn_buf, lpwwn_buf);
2082 else 2082 else
2083 BFA_LOG(KERN_INFO, bfad, log_level, 2083 BFA_LOG(KERN_INFO, bfad, bfa_log_level,
2084 "Remote port (WWN = %s) offlined by " 2084 "Remote port (WWN = %s) offlined by "
2085 "logical port (WWN = %s)\n", 2085 "logical port (WWN = %s)\n",
2086 rpwwn_buf, lpwwn_buf); 2086 rpwwn_buf, lpwwn_buf);
diff --git a/drivers/scsi/bfa/bfa_ioc.c b/drivers/scsi/bfa/bfa_ioc.c
index 54475b53a5ab..9f4aa391ea9d 100644
--- a/drivers/scsi/bfa/bfa_ioc.c
+++ b/drivers/scsi/bfa/bfa_ioc.c
@@ -402,7 +402,7 @@ bfa_ioc_sm_op_entry(struct bfa_ioc_s *ioc)
402 402
403 ioc->cbfn->enable_cbfn(ioc->bfa, BFA_STATUS_OK); 403 ioc->cbfn->enable_cbfn(ioc->bfa, BFA_STATUS_OK);
404 bfa_ioc_hb_monitor(ioc); 404 bfa_ioc_hb_monitor(ioc);
405 BFA_LOG(KERN_INFO, bfad, log_level, "IOC enabled\n"); 405 BFA_LOG(KERN_INFO, bfad, bfa_log_level, "IOC enabled\n");
406} 406}
407 407
408static void 408static void
@@ -444,7 +444,7 @@ bfa_ioc_sm_disabling_entry(struct bfa_ioc_s *ioc)
444{ 444{
445 struct bfad_s *bfad = (struct bfad_s *)ioc->bfa->bfad; 445 struct bfad_s *bfad = (struct bfad_s *)ioc->bfa->bfad;
446 bfa_iocpf_disable(ioc); 446 bfa_iocpf_disable(ioc);
447 BFA_LOG(KERN_INFO, bfad, log_level, "IOC disabled\n"); 447 BFA_LOG(KERN_INFO, bfad, bfa_log_level, "IOC disabled\n");
448} 448}
449 449
450/* 450/*
@@ -565,7 +565,7 @@ bfa_ioc_sm_fail_entry(struct bfa_ioc_s *ioc)
565 notify->cbfn(notify->cbarg); 565 notify->cbfn(notify->cbarg);
566 } 566 }
567 567
568 BFA_LOG(KERN_CRIT, bfad, log_level, 568 BFA_LOG(KERN_CRIT, bfad, bfa_log_level,
569 "Heart Beat of IOC has failed\n"); 569 "Heart Beat of IOC has failed\n");
570} 570}
571 571
@@ -1812,7 +1812,7 @@ bfa_ioc_pf_fwmismatch(struct bfa_ioc_s *ioc)
1812 * Provide enable completion callback. 1812 * Provide enable completion callback.
1813 */ 1813 */
1814 ioc->cbfn->enable_cbfn(ioc->bfa, BFA_STATUS_IOC_FAILURE); 1814 ioc->cbfn->enable_cbfn(ioc->bfa, BFA_STATUS_IOC_FAILURE);
1815 BFA_LOG(KERN_WARNING, bfad, log_level, 1815 BFA_LOG(KERN_WARNING, bfad, bfa_log_level,
1816 "Running firmware version is incompatible " 1816 "Running firmware version is incompatible "
1817 "with the driver version\n"); 1817 "with the driver version\n");
1818} 1818}
diff --git a/drivers/scsi/bfa/bfa_svc.c b/drivers/scsi/bfa/bfa_svc.c
index c768143f4805..37e16ac8f249 100644
--- a/drivers/scsi/bfa/bfa_svc.c
+++ b/drivers/scsi/bfa/bfa_svc.c
@@ -2138,7 +2138,7 @@ bfa_fcport_sm_enabling_qwait(struct bfa_fcport_s *fcport,
2138 bfa_plog_str(fcport->bfa->plog, BFA_PL_MID_HAL, 2138 bfa_plog_str(fcport->bfa->plog, BFA_PL_MID_HAL,
2139 BFA_PL_EID_PORT_DISABLE, 0, "Port Disable"); 2139 BFA_PL_EID_PORT_DISABLE, 0, "Port Disable");
2140 wwn2str(pwwn_buf, fcport->pwwn); 2140 wwn2str(pwwn_buf, fcport->pwwn);
2141 BFA_LOG(KERN_INFO, bfad, log_level, 2141 BFA_LOG(KERN_INFO, bfad, bfa_log_level,
2142 "Base port disabled: WWN = %s\n", pwwn_buf); 2142 "Base port disabled: WWN = %s\n", pwwn_buf);
2143 break; 2143 break;
2144 2144
@@ -2198,7 +2198,7 @@ bfa_fcport_sm_enabling(struct bfa_fcport_s *fcport,
2198 bfa_plog_str(fcport->bfa->plog, BFA_PL_MID_HAL, 2198 bfa_plog_str(fcport->bfa->plog, BFA_PL_MID_HAL,
2199 BFA_PL_EID_PORT_DISABLE, 0, "Port Disable"); 2199 BFA_PL_EID_PORT_DISABLE, 0, "Port Disable");
2200 wwn2str(pwwn_buf, fcport->pwwn); 2200 wwn2str(pwwn_buf, fcport->pwwn);
2201 BFA_LOG(KERN_INFO, bfad, log_level, 2201 BFA_LOG(KERN_INFO, bfad, bfa_log_level,
2202 "Base port disabled: WWN = %s\n", pwwn_buf); 2202 "Base port disabled: WWN = %s\n", pwwn_buf);
2203 break; 2203 break;
2204 2204
@@ -2251,7 +2251,7 @@ bfa_fcport_sm_linkdown(struct bfa_fcport_s *fcport,
2251 2251
2252 bfa_fcport_scn(fcport, BFA_PORT_LINKUP, BFA_FALSE); 2252 bfa_fcport_scn(fcport, BFA_PORT_LINKUP, BFA_FALSE);
2253 wwn2str(pwwn_buf, fcport->pwwn); 2253 wwn2str(pwwn_buf, fcport->pwwn);
2254 BFA_LOG(KERN_INFO, bfad, log_level, 2254 BFA_LOG(KERN_INFO, bfad, bfa_log_level,
2255 "Base port online: WWN = %s\n", pwwn_buf); 2255 "Base port online: WWN = %s\n", pwwn_buf);
2256 break; 2256 break;
2257 2257
@@ -2277,7 +2277,7 @@ bfa_fcport_sm_linkdown(struct bfa_fcport_s *fcport,
2277 bfa_plog_str(fcport->bfa->plog, BFA_PL_MID_HAL, 2277 bfa_plog_str(fcport->bfa->plog, BFA_PL_MID_HAL,
2278 BFA_PL_EID_PORT_DISABLE, 0, "Port Disable"); 2278 BFA_PL_EID_PORT_DISABLE, 0, "Port Disable");
2279 wwn2str(pwwn_buf, fcport->pwwn); 2279 wwn2str(pwwn_buf, fcport->pwwn);
2280 BFA_LOG(KERN_INFO, bfad, log_level, 2280 BFA_LOG(KERN_INFO, bfad, bfa_log_level,
2281 "Base port disabled: WWN = %s\n", pwwn_buf); 2281 "Base port disabled: WWN = %s\n", pwwn_buf);
2282 break; 2282 break;
2283 2283
@@ -2322,9 +2322,9 @@ bfa_fcport_sm_linkup(struct bfa_fcport_s *fcport,
2322 bfa_plog_str(fcport->bfa->plog, BFA_PL_MID_HAL, 2322 bfa_plog_str(fcport->bfa->plog, BFA_PL_MID_HAL,
2323 BFA_PL_EID_PORT_DISABLE, 0, "Port Disable"); 2323 BFA_PL_EID_PORT_DISABLE, 0, "Port Disable");
2324 wwn2str(pwwn_buf, fcport->pwwn); 2324 wwn2str(pwwn_buf, fcport->pwwn);
2325 BFA_LOG(KERN_INFO, bfad, log_level, 2325 BFA_LOG(KERN_INFO, bfad, bfa_log_level,
2326 "Base port offline: WWN = %s\n", pwwn_buf); 2326 "Base port offline: WWN = %s\n", pwwn_buf);
2327 BFA_LOG(KERN_INFO, bfad, log_level, 2327 BFA_LOG(KERN_INFO, bfad, bfa_log_level,
2328 "Base port disabled: WWN = %s\n", pwwn_buf); 2328 "Base port disabled: WWN = %s\n", pwwn_buf);
2329 break; 2329 break;
2330 2330
@@ -2336,10 +2336,10 @@ bfa_fcport_sm_linkup(struct bfa_fcport_s *fcport,
2336 BFA_PL_EID_PORT_ST_CHANGE, 0, "Port Linkdown"); 2336 BFA_PL_EID_PORT_ST_CHANGE, 0, "Port Linkdown");
2337 wwn2str(pwwn_buf, fcport->pwwn); 2337 wwn2str(pwwn_buf, fcport->pwwn);
2338 if (BFA_PORT_IS_DISABLED(fcport->bfa)) 2338 if (BFA_PORT_IS_DISABLED(fcport->bfa))
2339 BFA_LOG(KERN_INFO, bfad, log_level, 2339 BFA_LOG(KERN_INFO, bfad, bfa_log_level,
2340 "Base port offline: WWN = %s\n", pwwn_buf); 2340 "Base port offline: WWN = %s\n", pwwn_buf);
2341 else 2341 else
2342 BFA_LOG(KERN_ERR, bfad, log_level, 2342 BFA_LOG(KERN_ERR, bfad, bfa_log_level,
2343 "Base port (WWN = %s) " 2343 "Base port (WWN = %s) "
2344 "lost fabric connectivity\n", pwwn_buf); 2344 "lost fabric connectivity\n", pwwn_buf);
2345 break; 2345 break;
@@ -2349,10 +2349,10 @@ bfa_fcport_sm_linkup(struct bfa_fcport_s *fcport,
2349 bfa_fcport_reset_linkinfo(fcport); 2349 bfa_fcport_reset_linkinfo(fcport);
2350 wwn2str(pwwn_buf, fcport->pwwn); 2350 wwn2str(pwwn_buf, fcport->pwwn);
2351 if (BFA_PORT_IS_DISABLED(fcport->bfa)) 2351 if (BFA_PORT_IS_DISABLED(fcport->bfa))
2352 BFA_LOG(KERN_INFO, bfad, log_level, 2352 BFA_LOG(KERN_INFO, bfad, bfa_log_level,
2353 "Base port offline: WWN = %s\n", pwwn_buf); 2353 "Base port offline: WWN = %s\n", pwwn_buf);
2354 else 2354 else
2355 BFA_LOG(KERN_ERR, bfad, log_level, 2355 BFA_LOG(KERN_ERR, bfad, bfa_log_level,
2356 "Base port (WWN = %s) " 2356 "Base port (WWN = %s) "
2357 "lost fabric connectivity\n", pwwn_buf); 2357 "lost fabric connectivity\n", pwwn_buf);
2358 break; 2358 break;
@@ -2363,10 +2363,10 @@ bfa_fcport_sm_linkup(struct bfa_fcport_s *fcport,
2363 bfa_fcport_scn(fcport, BFA_PORT_LINKDOWN, BFA_FALSE); 2363 bfa_fcport_scn(fcport, BFA_PORT_LINKDOWN, BFA_FALSE);
2364 wwn2str(pwwn_buf, fcport->pwwn); 2364 wwn2str(pwwn_buf, fcport->pwwn);
2365 if (BFA_PORT_IS_DISABLED(fcport->bfa)) 2365 if (BFA_PORT_IS_DISABLED(fcport->bfa))
2366 BFA_LOG(KERN_INFO, bfad, log_level, 2366 BFA_LOG(KERN_INFO, bfad, bfa_log_level,
2367 "Base port offline: WWN = %s\n", pwwn_buf); 2367 "Base port offline: WWN = %s\n", pwwn_buf);
2368 else 2368 else
2369 BFA_LOG(KERN_ERR, bfad, log_level, 2369 BFA_LOG(KERN_ERR, bfad, bfa_log_level,
2370 "Base port (WWN = %s) " 2370 "Base port (WWN = %s) "
2371 "lost fabric connectivity\n", pwwn_buf); 2371 "lost fabric connectivity\n", pwwn_buf);
2372 break; 2372 break;
@@ -2497,7 +2497,7 @@ bfa_fcport_sm_disabling(struct bfa_fcport_s *fcport,
2497 bfa_plog_str(fcport->bfa->plog, BFA_PL_MID_HAL, 2497 bfa_plog_str(fcport->bfa->plog, BFA_PL_MID_HAL,
2498 BFA_PL_EID_PORT_ENABLE, 0, "Port Enable"); 2498 BFA_PL_EID_PORT_ENABLE, 0, "Port Enable");
2499 wwn2str(pwwn_buf, fcport->pwwn); 2499 wwn2str(pwwn_buf, fcport->pwwn);
2500 BFA_LOG(KERN_INFO, bfad, log_level, 2500 BFA_LOG(KERN_INFO, bfad, bfa_log_level,
2501 "Base port enabled: WWN = %s\n", pwwn_buf); 2501 "Base port enabled: WWN = %s\n", pwwn_buf);
2502 break; 2502 break;
2503 2503
@@ -2551,7 +2551,7 @@ bfa_fcport_sm_disabled(struct bfa_fcport_s *fcport,
2551 bfa_plog_str(fcport->bfa->plog, BFA_PL_MID_HAL, 2551 bfa_plog_str(fcport->bfa->plog, BFA_PL_MID_HAL,
2552 BFA_PL_EID_PORT_ENABLE, 0, "Port Enable"); 2552 BFA_PL_EID_PORT_ENABLE, 0, "Port Enable");
2553 wwn2str(pwwn_buf, fcport->pwwn); 2553 wwn2str(pwwn_buf, fcport->pwwn);
2554 BFA_LOG(KERN_INFO, bfad, log_level, 2554 BFA_LOG(KERN_INFO, bfad, bfa_log_level,
2555 "Base port enabled: WWN = %s\n", pwwn_buf); 2555 "Base port enabled: WWN = %s\n", pwwn_buf);
2556 break; 2556 break;
2557 2557
diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c
index 1f938974b848..6797720213b2 100644
--- a/drivers/scsi/bfa/bfad.c
+++ b/drivers/scsi/bfa/bfad.c
@@ -50,7 +50,7 @@ int reqq_size, rspq_size, num_sgpgs;
50int rport_del_timeout = BFA_FCS_RPORT_DEF_DEL_TIMEOUT; 50int rport_del_timeout = BFA_FCS_RPORT_DEF_DEL_TIMEOUT;
51int bfa_lun_queue_depth = BFAD_LUN_QUEUE_DEPTH; 51int bfa_lun_queue_depth = BFAD_LUN_QUEUE_DEPTH;
52int bfa_io_max_sge = BFAD_IO_MAX_SGE; 52int bfa_io_max_sge = BFAD_IO_MAX_SGE;
53int log_level = 3; /* WARNING log level */ 53int bfa_log_level = 3; /* WARNING log level */
54int ioc_auto_recover = BFA_TRUE; 54int ioc_auto_recover = BFA_TRUE;
55int bfa_linkup_delay = -1; 55int bfa_linkup_delay = -1;
56int fdmi_enable = BFA_TRUE; 56int fdmi_enable = BFA_TRUE;
@@ -108,8 +108,8 @@ module_param(bfa_lun_queue_depth, int, S_IRUGO | S_IWUSR);
108MODULE_PARM_DESC(bfa_lun_queue_depth, "Lun queue depth, default=32, Range[>0]"); 108MODULE_PARM_DESC(bfa_lun_queue_depth, "Lun queue depth, default=32, Range[>0]");
109module_param(bfa_io_max_sge, int, S_IRUGO | S_IWUSR); 109module_param(bfa_io_max_sge, int, S_IRUGO | S_IWUSR);
110MODULE_PARM_DESC(bfa_io_max_sge, "Max io scatter/gather elements, default=255"); 110MODULE_PARM_DESC(bfa_io_max_sge, "Max io scatter/gather elements, default=255");
111module_param(log_level, int, S_IRUGO | S_IWUSR); 111module_param(bfa_log_level, int, S_IRUGO | S_IWUSR);
112MODULE_PARM_DESC(log_level, "Driver log level, default=3, " 112MODULE_PARM_DESC(bfa_log_level, "Driver log level, default=3, "
113 "Range[Critical:1|Error:2|Warning:3|Info:4]"); 113 "Range[Critical:1|Error:2|Warning:3|Info:4]");
114module_param(ioc_auto_recover, int, S_IRUGO | S_IWUSR); 114module_param(ioc_auto_recover, int, S_IRUGO | S_IWUSR);
115MODULE_PARM_DESC(ioc_auto_recover, "IOC auto recovery, default=1, " 115MODULE_PARM_DESC(ioc_auto_recover, "IOC auto recovery, default=1, "
@@ -1112,7 +1112,7 @@ bfad_start_ops(struct bfad_s *bfad) {
1112 } else 1112 } else
1113 bfad_os_rport_online_wait(bfad); 1113 bfad_os_rport_online_wait(bfad);
1114 1114
1115 BFA_LOG(KERN_INFO, bfad, log_level, "bfa device claimed\n"); 1115 BFA_LOG(KERN_INFO, bfad, bfa_log_level, "bfa device claimed\n");
1116 1116
1117 return BFA_STATUS_OK; 1117 return BFA_STATUS_OK;
1118} 1118}
diff --git a/drivers/scsi/bfa/bfad_drv.h b/drivers/scsi/bfa/bfad_drv.h
index 97f9b6c0937e..d5ce2349ac59 100644
--- a/drivers/scsi/bfa/bfad_drv.h
+++ b/drivers/scsi/bfa/bfad_drv.h
@@ -337,7 +337,7 @@ extern int num_sgpgs;
337extern int rport_del_timeout; 337extern int rport_del_timeout;
338extern int bfa_lun_queue_depth; 338extern int bfa_lun_queue_depth;
339extern int bfa_io_max_sge; 339extern int bfa_io_max_sge;
340extern int log_level; 340extern int bfa_log_level;
341extern int ioc_auto_recover; 341extern int ioc_auto_recover;
342extern int bfa_linkup_delay; 342extern int bfa_linkup_delay;
343extern int msix_disable_cb; 343extern int msix_disable_cb;
diff --git a/drivers/scsi/bfa/bfad_im.c b/drivers/scsi/bfa/bfad_im.c
index 8ca967dee66d..fbad5e9b2402 100644
--- a/drivers/scsi/bfa/bfad_im.c
+++ b/drivers/scsi/bfa/bfad_im.c
@@ -225,7 +225,8 @@ bfad_im_abort_handler(struct scsi_cmnd *cmnd)
225 } 225 }
226 226
227 bfa_trc(bfad, hal_io->iotag); 227 bfa_trc(bfad, hal_io->iotag);
228 BFA_LOG(KERN_INFO, bfad, log_level, "scsi%d: abort cmnd %p iotag %x\n", 228 BFA_LOG(KERN_INFO, bfad, bfa_log_level,
229 "scsi%d: abort cmnd %p iotag %x\n",
229 im_port->shost->host_no, cmnd, hal_io->iotag); 230 im_port->shost->host_no, cmnd, hal_io->iotag);
230 (void) bfa_ioim_abort(hal_io); 231 (void) bfa_ioim_abort(hal_io);
231 spin_unlock_irqrestore(&bfad->bfad_lock, flags); 232 spin_unlock_irqrestore(&bfad->bfad_lock, flags);
@@ -241,7 +242,7 @@ bfad_im_abort_handler(struct scsi_cmnd *cmnd)
241 242
242 cmnd->scsi_done(cmnd); 243 cmnd->scsi_done(cmnd);
243 bfa_trc(bfad, hal_io->iotag); 244 bfa_trc(bfad, hal_io->iotag);
244 BFA_LOG(KERN_INFO, bfad, log_level, 245 BFA_LOG(KERN_INFO, bfad, bfa_log_level,
245 "scsi%d: complete abort 0x%p iotag 0x%x\n", 246 "scsi%d: complete abort 0x%p iotag 0x%x\n",
246 im_port->shost->host_no, cmnd, hal_io->iotag); 247 im_port->shost->host_no, cmnd, hal_io->iotag);
247 return SUCCESS; 248 return SUCCESS;
@@ -260,7 +261,7 @@ bfad_im_target_reset_send(struct bfad_s *bfad, struct scsi_cmnd *cmnd,
260 261
261 tskim = bfa_tskim_alloc(&bfad->bfa, (struct bfad_tskim_s *) cmnd); 262 tskim = bfa_tskim_alloc(&bfad->bfa, (struct bfad_tskim_s *) cmnd);
262 if (!tskim) { 263 if (!tskim) {
263 BFA_LOG(KERN_ERR, bfad, log_level, 264 BFA_LOG(KERN_ERR, bfad, bfa_log_level,
264 "target reset, fail to allocate tskim\n"); 265 "target reset, fail to allocate tskim\n");
265 rc = BFA_STATUS_FAILED; 266 rc = BFA_STATUS_FAILED;
266 goto out; 267 goto out;
@@ -311,7 +312,7 @@ bfad_im_reset_lun_handler(struct scsi_cmnd *cmnd)
311 312
312 tskim = bfa_tskim_alloc(&bfad->bfa, (struct bfad_tskim_s *) cmnd); 313 tskim = bfa_tskim_alloc(&bfad->bfa, (struct bfad_tskim_s *) cmnd);
313 if (!tskim) { 314 if (!tskim) {
314 BFA_LOG(KERN_ERR, bfad, log_level, 315 BFA_LOG(KERN_ERR, bfad, bfa_log_level,
315 "LUN reset, fail to allocate tskim"); 316 "LUN reset, fail to allocate tskim");
316 spin_unlock_irqrestore(&bfad->bfad_lock, flags); 317 spin_unlock_irqrestore(&bfad->bfad_lock, flags);
317 rc = FAILED; 318 rc = FAILED;
@@ -336,7 +337,7 @@ bfad_im_reset_lun_handler(struct scsi_cmnd *cmnd)
336 337
337 task_status = cmnd->SCp.Status >> 1; 338 task_status = cmnd->SCp.Status >> 1;
338 if (task_status != BFI_TSKIM_STS_OK) { 339 if (task_status != BFI_TSKIM_STS_OK) {
339 BFA_LOG(KERN_ERR, bfad, log_level, 340 BFA_LOG(KERN_ERR, bfad, bfa_log_level,
340 "LUN reset failure, status: %d\n", task_status); 341 "LUN reset failure, status: %d\n", task_status);
341 rc = FAILED; 342 rc = FAILED;
342 } 343 }
@@ -380,7 +381,7 @@ bfad_im_reset_bus_handler(struct scsi_cmnd *cmnd)
380 381
381 task_status = cmnd->SCp.Status >> 1; 382 task_status = cmnd->SCp.Status >> 1;
382 if (task_status != BFI_TSKIM_STS_OK) { 383 if (task_status != BFI_TSKIM_STS_OK) {
383 BFA_LOG(KERN_ERR, bfad, log_level, 384 BFA_LOG(KERN_ERR, bfad, bfa_log_level,
384 "target reset failure," 385 "target reset failure,"
385 " status: %d\n", task_status); 386 " status: %d\n", task_status);
386 err_cnt++; 387 err_cnt++;
@@ -460,7 +461,7 @@ bfa_fcb_itnim_free(struct bfad_s *bfad, struct bfad_itnim_s *itnim_drv)
460 fcid = bfa_fcs_itnim_get_fcid(&itnim_drv->fcs_itnim); 461 fcid = bfa_fcs_itnim_get_fcid(&itnim_drv->fcs_itnim);
461 wwn2str(wwpn_str, wwpn); 462 wwn2str(wwpn_str, wwpn);
462 fcid2str(fcid_str, fcid); 463 fcid2str(fcid_str, fcid);
463 BFA_LOG(KERN_INFO, bfad, log_level, 464 BFA_LOG(KERN_INFO, bfad, bfa_log_level,
464 "ITNIM FREE scsi%d: FCID: %s WWPN: %s\n", 465 "ITNIM FREE scsi%d: FCID: %s WWPN: %s\n",
465 port->im_port->shost->host_no, 466 port->im_port->shost->host_no,
466 fcid_str, wwpn_str); 467 fcid_str, wwpn_str);
@@ -589,7 +590,7 @@ void
589bfad_im_scsi_host_free(struct bfad_s *bfad, struct bfad_im_port_s *im_port) 590bfad_im_scsi_host_free(struct bfad_s *bfad, struct bfad_im_port_s *im_port)
590{ 591{
591 bfa_trc(bfad, bfad->inst_no); 592 bfa_trc(bfad, bfad->inst_no);
592 BFA_LOG(KERN_INFO, bfad, log_level, "Free scsi%d\n", 593 BFA_LOG(KERN_INFO, bfad, bfa_log_level, "Free scsi%d\n",
593 im_port->shost->host_no); 594 im_port->shost->host_no);
594 595
595 fc_remove_host(im_port->shost); 596 fc_remove_host(im_port->shost);
@@ -1048,7 +1049,7 @@ bfad_im_itnim_work_handler(struct work_struct *work)
1048 fcid2str(fcid_str, fcid); 1049 fcid2str(fcid_str, fcid);
1049 list_add_tail(&itnim->list_entry, 1050 list_add_tail(&itnim->list_entry,
1050 &im_port->itnim_mapped_list); 1051 &im_port->itnim_mapped_list);
1051 BFA_LOG(KERN_INFO, bfad, log_level, 1052 BFA_LOG(KERN_INFO, bfad, bfa_log_level,
1052 "ITNIM ONLINE Target: %d:0:%d " 1053 "ITNIM ONLINE Target: %d:0:%d "
1053 "FCID: %s WWPN: %s\n", 1054 "FCID: %s WWPN: %s\n",
1054 im_port->shost->host_no, 1055 im_port->shost->host_no,
@@ -1081,7 +1082,7 @@ bfad_im_itnim_work_handler(struct work_struct *work)
1081 wwn2str(wwpn_str, wwpn); 1082 wwn2str(wwpn_str, wwpn);
1082 fcid2str(fcid_str, fcid); 1083 fcid2str(fcid_str, fcid);
1083 list_del(&itnim->list_entry); 1084 list_del(&itnim->list_entry);
1084 BFA_LOG(KERN_INFO, bfad, log_level, 1085 BFA_LOG(KERN_INFO, bfad, bfa_log_level,
1085 "ITNIM OFFLINE Target: %d:0:%d " 1086 "ITNIM OFFLINE Target: %d:0:%d "
1086 "FCID: %s WWPN: %s\n", 1087 "FCID: %s WWPN: %s\n",
1087 im_port->shost->host_no, 1088 im_port->shost->host_no,
diff --git a/drivers/sh/intc/core.c b/drivers/sh/intc/core.c
index e5e9e6735f7d..9739431092d1 100644
--- a/drivers/sh/intc/core.c
+++ b/drivers/sh/intc/core.c
@@ -198,6 +198,7 @@ int __init register_intc_controller(struct intc_desc *desc)
198 list_add_tail(&d->list, &intc_list); 198 list_add_tail(&d->list, &intc_list);
199 199
200 raw_spin_lock_init(&d->lock); 200 raw_spin_lock_init(&d->lock);
201 INIT_RADIX_TREE(&d->tree, GFP_ATOMIC);
201 202
202 d->index = nr_intc_controllers; 203 d->index = nr_intc_controllers;
203 204
diff --git a/drivers/spi/coldfire_qspi.c b/drivers/spi/coldfire_qspi.c
index 052b3c7fa6a0..8856bcca9d29 100644
--- a/drivers/spi/coldfire_qspi.c
+++ b/drivers/spi/coldfire_qspi.c
@@ -317,7 +317,7 @@ static void mcfqspi_work(struct work_struct *work)
317 msg = container_of(mcfqspi->msgq.next, struct spi_message, 317 msg = container_of(mcfqspi->msgq.next, struct spi_message,
318 queue); 318 queue);
319 319
320 list_del_init(&mcfqspi->msgq); 320 list_del_init(&msg->queue);
321 spin_unlock_irqrestore(&mcfqspi->lock, flags); 321 spin_unlock_irqrestore(&mcfqspi->lock, flags);
322 322
323 spi = msg->spi; 323 spi = msg->spi;
diff --git a/drivers/spi/mpc52xx_spi.c b/drivers/spi/mpc52xx_spi.c
index ec9f0b1bf864..84439f655601 100644
--- a/drivers/spi/mpc52xx_spi.c
+++ b/drivers/spi/mpc52xx_spi.c
@@ -563,7 +563,7 @@ static struct of_platform_driver mpc52xx_spi_of_driver = {
563 .of_match_table = mpc52xx_spi_match, 563 .of_match_table = mpc52xx_spi_match,
564 }, 564 },
565 .probe = mpc52xx_spi_probe, 565 .probe = mpc52xx_spi_probe,
566 .remove = __exit_p(mpc52xx_spi_remove), 566 .remove = __devexit_p(mpc52xx_spi_remove),
567}; 567};
568 568
569static int __init mpc52xx_spi_init(void) 569static int __init mpc52xx_spi_init(void)
diff --git a/drivers/spi/omap2_mcspi.c b/drivers/spi/omap2_mcspi.c
index 2a651e61bfbf..951a160fc27f 100644
--- a/drivers/spi/omap2_mcspi.c
+++ b/drivers/spi/omap2_mcspi.c
@@ -1305,10 +1305,49 @@ static int __exit omap2_mcspi_remove(struct platform_device *pdev)
1305/* work with hotplug and coldplug */ 1305/* work with hotplug and coldplug */
1306MODULE_ALIAS("platform:omap2_mcspi"); 1306MODULE_ALIAS("platform:omap2_mcspi");
1307 1307
1308#ifdef CONFIG_SUSPEND
1309/*
1310 * When SPI wake up from off-mode, CS is in activate state. If it was in
1311 * unactive state when driver was suspend, then force it to unactive state at
1312 * wake up.
1313 */
1314static int omap2_mcspi_resume(struct device *dev)
1315{
1316 struct spi_master *master = dev_get_drvdata(dev);
1317 struct omap2_mcspi *mcspi = spi_master_get_devdata(master);
1318 struct omap2_mcspi_cs *cs;
1319
1320 omap2_mcspi_enable_clocks(mcspi);
1321 list_for_each_entry(cs, &omap2_mcspi_ctx[master->bus_num - 1].cs,
1322 node) {
1323 if ((cs->chconf0 & OMAP2_MCSPI_CHCONF_FORCE) == 0) {
1324
1325 /*
1326 * We need to toggle CS state for OMAP take this
1327 * change in account.
1328 */
1329 MOD_REG_BIT(cs->chconf0, OMAP2_MCSPI_CHCONF_FORCE, 1);
1330 __raw_writel(cs->chconf0, cs->base + OMAP2_MCSPI_CHCONF0);
1331 MOD_REG_BIT(cs->chconf0, OMAP2_MCSPI_CHCONF_FORCE, 0);
1332 __raw_writel(cs->chconf0, cs->base + OMAP2_MCSPI_CHCONF0);
1333 }
1334 }
1335 omap2_mcspi_disable_clocks(mcspi);
1336 return 0;
1337}
1338#else
1339#define omap2_mcspi_resume NULL
1340#endif
1341
1342static const struct dev_pm_ops omap2_mcspi_pm_ops = {
1343 .resume = omap2_mcspi_resume,
1344};
1345
1308static struct platform_driver omap2_mcspi_driver = { 1346static struct platform_driver omap2_mcspi_driver = {
1309 .driver = { 1347 .driver = {
1310 .name = "omap2_mcspi", 1348 .name = "omap2_mcspi",
1311 .owner = THIS_MODULE, 1349 .owner = THIS_MODULE,
1350 .pm = &omap2_mcspi_pm_ops
1312 }, 1351 },
1313 .remove = __exit_p(omap2_mcspi_remove), 1352 .remove = __exit_p(omap2_mcspi_remove),
1314}; 1353};
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 709c836607de..b02d0cbce890 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -584,8 +584,7 @@ void spi_unregister_master(struct spi_master *master)
584 list_del(&master->list); 584 list_del(&master->list);
585 mutex_unlock(&board_lock); 585 mutex_unlock(&board_lock);
586 586
587 dummy = device_for_each_child(master->dev.parent, &master->dev, 587 dummy = device_for_each_child(&master->dev, NULL, __unregister);
588 __unregister);
589 device_unregister(&master->dev); 588 device_unregister(&master->dev);
590} 589}
591EXPORT_SYMBOL_GPL(spi_unregister_master); 590EXPORT_SYMBOL_GPL(spi_unregister_master);
diff --git a/drivers/spi/spi_fsl_espi.c b/drivers/spi/spi_fsl_espi.c
index e3b4f6451966..a99e2333b949 100644
--- a/drivers/spi/spi_fsl_espi.c
+++ b/drivers/spi/spi_fsl_espi.c
@@ -258,18 +258,18 @@ static int fsl_espi_bufs(struct spi_device *spi, struct spi_transfer *t)
258 return mpc8xxx_spi->count; 258 return mpc8xxx_spi->count;
259} 259}
260 260
261static void fsl_espi_addr2cmd(unsigned int addr, u8 *cmd) 261static inline void fsl_espi_addr2cmd(unsigned int addr, u8 *cmd)
262{ 262{
263 if (cmd[1] && cmd[2] && cmd[3]) { 263 if (cmd) {
264 cmd[1] = (u8)(addr >> 16); 264 cmd[1] = (u8)(addr >> 16);
265 cmd[2] = (u8)(addr >> 8); 265 cmd[2] = (u8)(addr >> 8);
266 cmd[3] = (u8)(addr >> 0); 266 cmd[3] = (u8)(addr >> 0);
267 } 267 }
268} 268}
269 269
270static unsigned int fsl_espi_cmd2addr(u8 *cmd) 270static inline unsigned int fsl_espi_cmd2addr(u8 *cmd)
271{ 271{
272 if (cmd[1] && cmd[2] && cmd[3]) 272 if (cmd)
273 return cmd[1] << 16 | cmd[2] << 8 | cmd[3] << 0; 273 return cmd[1] << 16 | cmd[2] << 8 | cmd[3] << 0;
274 274
275 return 0; 275 return 0;
@@ -395,9 +395,11 @@ static void fsl_espi_rw_trans(struct spi_message *m,
395 } 395 }
396 } 396 }
397 397
398 addr = fsl_espi_cmd2addr(local_buf); 398 if (pos > 0) {
399 addr += pos; 399 addr = fsl_espi_cmd2addr(local_buf);
400 fsl_espi_addr2cmd(addr, local_buf); 400 addr += pos;
401 fsl_espi_addr2cmd(addr, local_buf);
402 }
401 403
402 espi_trans->n_tx = n_tx; 404 espi_trans->n_tx = n_tx;
403 espi_trans->n_rx = trans_len; 405 espi_trans->n_rx = trans_len;
@@ -507,16 +509,29 @@ void fsl_espi_cpu_irq(struct mpc8xxx_spi *mspi, u32 events)
507 509
508 /* We need handle RX first */ 510 /* We need handle RX first */
509 if (events & SPIE_NE) { 511 if (events & SPIE_NE) {
510 u32 rx_data; 512 u32 rx_data, tmp;
513 u8 rx_data_8;
511 514
512 /* Spin until RX is done */ 515 /* Spin until RX is done */
513 while (SPIE_RXCNT(events) < min(4, mspi->len)) { 516 while (SPIE_RXCNT(events) < min(4, mspi->len)) {
514 cpu_relax(); 517 cpu_relax();
515 events = mpc8xxx_spi_read_reg(&reg_base->event); 518 events = mpc8xxx_spi_read_reg(&reg_base->event);
516 } 519 }
517 mspi->len -= 4;
518 520
519 rx_data = mpc8xxx_spi_read_reg(&reg_base->receive); 521 if (mspi->len >= 4) {
522 rx_data = mpc8xxx_spi_read_reg(&reg_base->receive);
523 } else {
524 tmp = mspi->len;
525 rx_data = 0;
526 while (tmp--) {
527 rx_data_8 = in_8((u8 *)&reg_base->receive);
528 rx_data |= (rx_data_8 << (tmp * 8));
529 }
530
531 rx_data <<= (4 - mspi->len) * 8;
532 }
533
534 mspi->len -= 4;
520 535
521 if (mspi->rx) 536 if (mspi->rx)
522 mspi->get_rx(rx_data, mspi); 537 mspi->get_rx(rx_data, mspi);
diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
index 8c3c057aa847..d0e9e0207539 100644
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -435,12 +435,6 @@ static int zram_make_request(struct request_queue *queue, struct bio *bio)
435 int ret = 0; 435 int ret = 0;
436 struct zram *zram = queue->queuedata; 436 struct zram *zram = queue->queuedata;
437 437
438 if (unlikely(!zram->init_done)) {
439 set_bit(BIO_UPTODATE, &bio->bi_flags);
440 bio_endio(bio, 0);
441 return 0;
442 }
443
444 if (!valid_io_request(zram, bio)) { 438 if (!valid_io_request(zram, bio)) {
445 zram_stat64_inc(zram, &zram->stats.invalid_io); 439 zram_stat64_inc(zram, &zram->stats.invalid_io);
446 bio_io_error(bio); 440 bio_io_error(bio);
diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c
index 44447f54942f..99ac70e32556 100644
--- a/drivers/usb/atm/ueagle-atm.c
+++ b/drivers/usb/atm/ueagle-atm.c
@@ -2206,8 +2206,11 @@ static int uea_boot(struct uea_softc *sc)
2206 goto err1; 2206 goto err1;
2207 } 2207 }
2208 2208
2209 sc->kthread = kthread_run(uea_kthread, sc, "ueagle-atm"); 2209 /* Create worker thread, but don't start it here. Start it after
2210 if (sc->kthread == ERR_PTR(-ENOMEM)) { 2210 * all usbatm generic initialization is done.
2211 */
2212 sc->kthread = kthread_create(uea_kthread, sc, "ueagle-atm");
2213 if (IS_ERR(sc->kthread)) {
2211 uea_err(INS_TO_USBDEV(sc), "failed to create thread\n"); 2214 uea_err(INS_TO_USBDEV(sc), "failed to create thread\n");
2212 goto err2; 2215 goto err2;
2213 } 2216 }
@@ -2624,6 +2627,7 @@ static struct usbatm_driver uea_usbatm_driver = {
2624static int uea_probe(struct usb_interface *intf, const struct usb_device_id *id) 2627static int uea_probe(struct usb_interface *intf, const struct usb_device_id *id)
2625{ 2628{
2626 struct usb_device *usb = interface_to_usbdev(intf); 2629 struct usb_device *usb = interface_to_usbdev(intf);
2630 int ret;
2627 2631
2628 uea_enters(usb); 2632 uea_enters(usb);
2629 uea_info(usb, "ADSL device founded vid (%#X) pid (%#X) Rev (%#X): %s\n", 2633 uea_info(usb, "ADSL device founded vid (%#X) pid (%#X) Rev (%#X): %s\n",
@@ -2637,7 +2641,19 @@ static int uea_probe(struct usb_interface *intf, const struct usb_device_id *id)
2637 if (UEA_IS_PREFIRM(id)) 2641 if (UEA_IS_PREFIRM(id))
2638 return uea_load_firmware(usb, UEA_CHIP_VERSION(id)); 2642 return uea_load_firmware(usb, UEA_CHIP_VERSION(id));
2639 2643
2640 return usbatm_usb_probe(intf, id, &uea_usbatm_driver); 2644 ret = usbatm_usb_probe(intf, id, &uea_usbatm_driver);
2645 if (ret == 0) {
2646 struct usbatm_data *usbatm = usb_get_intfdata(intf);
2647 struct uea_softc *sc = usbatm->driver_data;
2648
2649 /* Ensure carrier is initialized to off as early as possible */
2650 UPDATE_ATM_SIGNAL(ATM_PHY_SIG_LOST);
2651
2652 /* Only start the worker thread when all init is done */
2653 wake_up_process(sc->kthread);
2654 }
2655
2656 return ret;
2641} 2657}
2642 2658
2643static void uea_disconnect(struct usb_interface *intf) 2659static void uea_disconnect(struct usb_interface *intf)
diff --git a/drivers/video/backlight/cr_bllcd.c b/drivers/video/backlight/cr_bllcd.c
index a4f4546f0be0..397d15eb1ea8 100644
--- a/drivers/video/backlight/cr_bllcd.c
+++ b/drivers/video/backlight/cr_bllcd.c
@@ -242,6 +242,7 @@ static int cr_backlight_remove(struct platform_device *pdev)
242 backlight_device_unregister(crp->cr_backlight_device); 242 backlight_device_unregister(crp->cr_backlight_device);
243 lcd_device_unregister(crp->cr_lcd_device); 243 lcd_device_unregister(crp->cr_lcd_device);
244 pci_dev_put(lpc_dev); 244 pci_dev_put(lpc_dev);
245 kfree(crp);
245 246
246 return 0; 247 return 0;
247} 248}
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 0e6aa3d96a42..4ac1201ad6c2 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -1458,7 +1458,7 @@ static bool apertures_overlap(struct aperture *gen, struct aperture *hw)
1458 if (gen->base == hw->base) 1458 if (gen->base == hw->base)
1459 return true; 1459 return true;
1460 /* is the generic aperture base inside the hw base->hw base+size */ 1460 /* is the generic aperture base inside the hw base->hw base+size */
1461 if (gen->base > hw->base && gen->base <= hw->base + hw->size) 1461 if (gen->base > hw->base && gen->base < hw->base + hw->size)
1462 return true; 1462 return true;
1463 return false; 1463 return false;
1464} 1464}
diff --git a/drivers/video/imxfb.c b/drivers/video/imxfb.c
index 5c363d026f64..1ab2c2588675 100644
--- a/drivers/video/imxfb.c
+++ b/drivers/video/imxfb.c
@@ -53,11 +53,8 @@
53#define LCDC_SIZE 0x04 53#define LCDC_SIZE 0x04
54#define SIZE_XMAX(x) ((((x) >> 4) & 0x3f) << 20) 54#define SIZE_XMAX(x) ((((x) >> 4) & 0x3f) << 20)
55 55
56#ifdef CONFIG_ARCH_MX1 56#define YMAX_MASK (cpu_is_mx1() ? 0x1ff : 0x3ff)
57#define SIZE_YMAX(y) ((y) & 0x1ff) 57#define SIZE_YMAX(y) ((y) & YMAX_MASK)
58#else
59#define SIZE_YMAX(y) ((y) & 0x3ff)
60#endif
61 58
62#define LCDC_VPW 0x08 59#define LCDC_VPW 0x08
63#define VPW_VPW(x) ((x) & 0x3ff) 60#define VPW_VPW(x) ((x) & 0x3ff)
@@ -623,7 +620,7 @@ static int imxfb_activate_var(struct fb_var_screeninfo *var, struct fb_info *inf
623 if (var->right_margin > 255) 620 if (var->right_margin > 255)
624 printk(KERN_ERR "%s: invalid right_margin %d\n", 621 printk(KERN_ERR "%s: invalid right_margin %d\n",
625 info->fix.id, var->right_margin); 622 info->fix.id, var->right_margin);
626 if (var->yres < 1 || var->yres > 511) 623 if (var->yres < 1 || var->yres > YMAX_MASK)
627 printk(KERN_ERR "%s: invalid yres %d\n", 624 printk(KERN_ERR "%s: invalid yres %d\n",
628 info->fix.id, var->yres); 625 info->fix.id, var->yres);
629 if (var->vsync_len > 100) 626 if (var->vsync_len > 100)
diff --git a/drivers/video/sh_mobile_hdmi.c b/drivers/video/sh_mobile_hdmi.c
index d7df10315d8d..fcda0e970113 100644
--- a/drivers/video/sh_mobile_hdmi.c
+++ b/drivers/video/sh_mobile_hdmi.c
@@ -787,6 +787,9 @@ static int sh_hdmi_read_edid(struct sh_hdmi *hdmi)
787 found_rate_error = rate_error; 787 found_rate_error = rate_error;
788 } 788 }
789 789
790 hdmi->var.width = hdmi->monspec.max_x * 10;
791 hdmi->var.height = hdmi->monspec.max_y * 10;
792
790 /* 793 /*
791 * TODO 1: if no ->info is present, postpone running the config until 794 * TODO 1: if no ->info is present, postpone running the config until
792 * after ->info first gets registered. 795 * after ->info first gets registered.
@@ -960,8 +963,12 @@ static bool sh_hdmi_must_reconfigure(struct sh_hdmi *hdmi)
960 dev_dbg(info->dev, "Old %ux%u, new %ux%u\n", 963 dev_dbg(info->dev, "Old %ux%u, new %ux%u\n",
961 mode1.xres, mode1.yres, mode2.xres, mode2.yres); 964 mode1.xres, mode1.yres, mode2.xres, mode2.yres);
962 965
963 if (fb_mode_is_equal(&mode1, &mode2)) 966 if (fb_mode_is_equal(&mode1, &mode2)) {
967 /* It can be a different monitor with an equal video-mode */
968 old_var->width = new_var->width;
969 old_var->height = new_var->height;
964 return false; 970 return false;
971 }
965 972
966 dev_dbg(info->dev, "Switching %u -> %u lines\n", 973 dev_dbg(info->dev, "Switching %u -> %u lines\n",
967 mode1.yres, mode2.yres); 974 mode1.yres, mode2.yres);
@@ -1057,8 +1064,11 @@ static void sh_hdmi_edid_work_fn(struct work_struct *work)
1057 * on, if we run a resume here, the logo disappears 1064 * on, if we run a resume here, the logo disappears
1058 */ 1065 */
1059 if (lock_fb_info(hdmi->info)) { 1066 if (lock_fb_info(hdmi->info)) {
1060 sh_hdmi_display_on(hdmi, hdmi->info); 1067 struct fb_info *info = hdmi->info;
1061 unlock_fb_info(hdmi->info); 1068 info->var.width = hdmi->var.width;
1069 info->var.height = hdmi->var.height;
1070 sh_hdmi_display_on(hdmi, info);
1071 unlock_fb_info(info);
1062 } 1072 }
1063 } else { 1073 } else {
1064 /* New monitor or have to wake up */ 1074 /* New monitor or have to wake up */
diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c
index b02d97a879d6..c05326b61235 100644
--- a/drivers/video/sh_mobile_lcdcfb.c
+++ b/drivers/video/sh_mobile_lcdcfb.c
@@ -54,8 +54,8 @@ static int lcdc_shared_regs[] = {
54}; 54};
55#define NR_SHARED_REGS ARRAY_SIZE(lcdc_shared_regs) 55#define NR_SHARED_REGS ARRAY_SIZE(lcdc_shared_regs)
56 56
57#define DEFAULT_XRES 1280 57#define MAX_XRES 1920
58#define DEFAULT_YRES 1024 58#define MAX_YRES 1080
59 59
60static unsigned long lcdc_offs_mainlcd[NR_CH_REGS] = { 60static unsigned long lcdc_offs_mainlcd[NR_CH_REGS] = {
61 [LDDCKPAT1R] = 0x400, 61 [LDDCKPAT1R] = 0x400,
@@ -914,22 +914,12 @@ static int sh_mobile_check_var(struct fb_var_screeninfo *var, struct fb_info *in
914{ 914{
915 struct sh_mobile_lcdc_chan *ch = info->par; 915 struct sh_mobile_lcdc_chan *ch = info->par;
916 916
917 if (var->xres < 160 || var->xres > 1920 || 917 if (var->xres > MAX_XRES || var->yres > MAX_YRES ||
918 var->yres < 120 || var->yres > 1080 ||
919 var->left_margin < 32 || var->left_margin > 320 ||
920 var->right_margin < 12 || var->right_margin > 240 ||
921 var->upper_margin < 12 || var->upper_margin > 120 ||
922 var->lower_margin < 1 || var->lower_margin > 64 ||
923 var->hsync_len < 32 || var->hsync_len > 240 ||
924 var->vsync_len < 2 || var->vsync_len > 64 ||
925 var->pixclock < 6000 || var->pixclock > 40000 ||
926 var->xres * var->yres * (ch->cfg.bpp / 8) * 2 > info->fix.smem_len) { 918 var->xres * var->yres * (ch->cfg.bpp / 8) * 2 > info->fix.smem_len) {
927 dev_warn(info->dev, "Invalid info: %u %u %u %u %u %u %u %u %u!\n", 919 dev_warn(info->dev, "Invalid info: %u-%u-%u-%u x %u-%u-%u-%u @ %ukHz!\n",
928 var->xres, var->yres, 920 var->left_margin, var->xres, var->right_margin, var->hsync_len,
929 var->left_margin, var->right_margin, 921 var->upper_margin, var->yres, var->lower_margin, var->vsync_len,
930 var->upper_margin, var->lower_margin, 922 PICOS2KHZ(var->pixclock));
931 var->hsync_len, var->vsync_len,
932 var->pixclock);
933 return -EINVAL; 923 return -EINVAL;
934 } 924 }
935 return 0; 925 return 0;
@@ -1226,7 +1216,7 @@ static int __devinit sh_mobile_lcdc_probe(struct platform_device *pdev)
1226 } 1216 }
1227 1217
1228 if (!mode) 1218 if (!mode)
1229 max_size = DEFAULT_XRES * DEFAULT_YRES; 1219 max_size = MAX_XRES * MAX_YRES;
1230 else if (max_cfg) 1220 else if (max_cfg)
1231 dev_dbg(&pdev->dev, "Found largest videomode %ux%u\n", 1221 dev_dbg(&pdev->dev, "Found largest videomode %ux%u\n",
1232 max_cfg->xres, max_cfg->yres); 1222 max_cfg->xres, max_cfg->yres);
@@ -1238,12 +1228,14 @@ static int __devinit sh_mobile_lcdc_probe(struct platform_device *pdev)
1238 mode = &default_720p; 1228 mode = &default_720p;
1239 num_cfg = 1; 1229 num_cfg = 1;
1240 } else { 1230 } else {
1241 num_cfg = ch->cfg.num_cfg; 1231 num_cfg = cfg->num_cfg;
1242 } 1232 }
1243 1233
1244 fb_videomode_to_modelist(mode, num_cfg, &info->modelist); 1234 fb_videomode_to_modelist(mode, num_cfg, &info->modelist);
1245 1235
1246 fb_videomode_to_var(var, mode); 1236 fb_videomode_to_var(var, mode);
1237 var->width = cfg->lcd_size_cfg.width;
1238 var->height = cfg->lcd_size_cfg.height;
1247 /* Default Y virtual resolution is 2x panel size */ 1239 /* Default Y virtual resolution is 2x panel size */
1248 var->yres_virtual = var->yres * 2; 1240 var->yres_virtual = var->yres * 2;
1249 var->activate = FB_ACTIVATE_NOW; 1241 var->activate = FB_ACTIVATE_NOW;
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index 3d77116e4634..dea7b5bf6e2c 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -642,19 +642,14 @@ static struct notifier_block die_notifier = {
642 */ 642 */
643 643
644#ifdef CONFIG_HPWDT_NMI_DECODING 644#ifdef CONFIG_HPWDT_NMI_DECODING
645#ifdef ARCH_HAS_NMI_WATCHDOG 645#ifdef CONFIG_X86_LOCAL_APIC
646static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev) 646static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev)
647{ 647{
648 /* 648 /*
649 * If nmi_watchdog is turned off then we can turn on 649 * If nmi_watchdog is turned off then we can turn on
650 * our nmi decoding capability. 650 * our nmi decoding capability.
651 */ 651 */
652 if (!nmi_watchdog_active()) 652 hpwdt_nmi_decoding = 1;
653 hpwdt_nmi_decoding = 1;
654 else
655 dev_warn(&dev->dev, "NMI decoding is disabled. To enable this "
656 "functionality you must reboot with nmi_watchdog=0 "
657 "and load the hpwdt driver with priority=1.\n");
658} 653}
659#else 654#else
660static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev) 655static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev)
@@ -662,7 +657,7 @@ static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev)
662 dev_warn(&dev->dev, "NMI decoding is disabled. " 657 dev_warn(&dev->dev, "NMI decoding is disabled. "
663 "Your kernel does not support a NMI Watchdog.\n"); 658 "Your kernel does not support a NMI Watchdog.\n");
664} 659}
665#endif /* ARCH_HAS_NMI_WATCHDOG */ 660#endif /* CONFIG_X86_LOCAL_APIC */
666 661
667static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev) 662static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev)
668{ 663{
diff --git a/drivers/watchdog/rdc321x_wdt.c b/drivers/watchdog/rdc321x_wdt.c
index 428f8a1583e8..3939e53f5f98 100644
--- a/drivers/watchdog/rdc321x_wdt.c
+++ b/drivers/watchdog/rdc321x_wdt.c
@@ -231,7 +231,7 @@ static int __devinit rdc321x_wdt_probe(struct platform_device *pdev)
231 struct resource *r; 231 struct resource *r;
232 struct rdc321x_wdt_pdata *pdata; 232 struct rdc321x_wdt_pdata *pdata;
233 233
234 pdata = pdev->dev.platform_data; 234 pdata = platform_get_drvdata(pdev);
235 if (!pdata) { 235 if (!pdata) {
236 dev_err(&pdev->dev, "no platform data supplied\n"); 236 dev_err(&pdev->dev, "no platform data supplied\n");
237 return -ENODEV; 237 return -ENODEV;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index dc963929de65..981c8477adab 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -232,6 +232,8 @@ static int setup_new_group_blocks(struct super_block *sb,
232 GFP_NOFS); 232 GFP_NOFS);
233 if (err) 233 if (err)
234 goto exit_bh; 234 goto exit_bh;
235 for (i = 0, bit = gdblocks + 1; i < reserved_gdb; i++, bit++)
236 ext4_set_bit(bit, bh->b_data);
235 237
236 ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, 238 ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
237 input->block_bitmap - start); 239 input->block_bitmap - start);
@@ -247,6 +249,9 @@ static int setup_new_group_blocks(struct super_block *sb,
247 err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); 249 err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
248 if (err) 250 if (err)
249 goto exit_bh; 251 goto exit_bh;
252 for (i = 0, bit = input->inode_table - start;
253 i < sbi->s_itb_per_group; i++, bit++)
254 ext4_set_bit(bit, bh->b_data);
250 255
251 if ((err = extend_or_restart_transaction(handle, 2, bh))) 256 if ((err = extend_or_restart_transaction(handle, 2, bh)))
252 goto exit_bh; 257 goto exit_bh;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 5476c066d4ee..3c4039d5eef1 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -763,7 +763,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
763 int metadata; 763 int metadata;
764 unsigned int revokes = 0; 764 unsigned int revokes = 0;
765 int x; 765 int x;
766 int error; 766 int error = 0;
767 767
768 if (!*top) 768 if (!*top)
769 sm->sm_first = 0; 769 sm->sm_first = 0;
@@ -780,7 +780,11 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
780 if (metadata) 780 if (metadata)
781 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; 781 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
782 782
783 error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh); 783 if (ip != GFS2_I(sdp->sd_rindex))
784 error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh);
785 else if (!sdp->sd_rgrps)
786 error = gfs2_ri_update(ip);
787
784 if (error) 788 if (error)
785 return error; 789 return error;
786 790
@@ -879,7 +883,8 @@ out_rg_gunlock:
879out_rlist: 883out_rlist:
880 gfs2_rlist_free(&rlist); 884 gfs2_rlist_free(&rlist);
881out: 885out:
882 gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh); 886 if (ip != GFS2_I(sdp->sd_rindex))
887 gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh);
883 return error; 888 return error;
884} 889}
885 890
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f92c17704169..08a8beb152e6 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -541,21 +541,6 @@ out_locked:
541 spin_unlock(&gl->gl_spin); 541 spin_unlock(&gl->gl_spin);
542} 542}
543 543
544static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
545 unsigned int req_state,
546 unsigned int flags)
547{
548 int ret = LM_OUT_ERROR;
549
550 if (!sdp->sd_lockstruct.ls_ops->lm_lock)
551 return req_state == LM_ST_UNLOCKED ? 0 : req_state;
552
553 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
554 ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock,
555 req_state, flags);
556 return ret;
557}
558
559/** 544/**
560 * do_xmote - Calls the DLM to change the state of a lock 545 * do_xmote - Calls the DLM to change the state of a lock
561 * @gl: The lock state 546 * @gl: The lock state
@@ -575,13 +560,14 @@ __acquires(&gl->gl_spin)
575 560
576 lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | 561 lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
577 LM_FLAG_PRIORITY); 562 LM_FLAG_PRIORITY);
578 BUG_ON(gl->gl_state == target); 563 GLOCK_BUG_ON(gl, gl->gl_state == target);
579 BUG_ON(gl->gl_state == gl->gl_target); 564 GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target);
580 if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) && 565 if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) &&
581 glops->go_inval) { 566 glops->go_inval) {
582 set_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); 567 set_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
583 do_error(gl, 0); /* Fail queued try locks */ 568 do_error(gl, 0); /* Fail queued try locks */
584 } 569 }
570 gl->gl_req = target;
585 spin_unlock(&gl->gl_spin); 571 spin_unlock(&gl->gl_spin);
586 if (glops->go_xmote_th) 572 if (glops->go_xmote_th)
587 glops->go_xmote_th(gl); 573 glops->go_xmote_th(gl);
@@ -594,15 +580,17 @@ __acquires(&gl->gl_spin)
594 gl->gl_state == LM_ST_DEFERRED) && 580 gl->gl_state == LM_ST_DEFERRED) &&
595 !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) 581 !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
596 lck_flags |= LM_FLAG_TRY_1CB; 582 lck_flags |= LM_FLAG_TRY_1CB;
597 ret = gfs2_lm_lock(sdp, gl, target, lck_flags);
598 583
599 if (!(ret & LM_OUT_ASYNC)) { 584 if (sdp->sd_lockstruct.ls_ops->lm_lock) {
600 finish_xmote(gl, ret); 585 /* lock_dlm */
586 ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
587 GLOCK_BUG_ON(gl, ret);
588 } else { /* lock_nolock */
589 finish_xmote(gl, target);
601 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) 590 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
602 gfs2_glock_put(gl); 591 gfs2_glock_put(gl);
603 } else {
604 GLOCK_BUG_ON(gl, ret != LM_OUT_ASYNC);
605 } 592 }
593
606 spin_lock(&gl->gl_spin); 594 spin_lock(&gl->gl_spin);
607} 595}
608 596
@@ -951,17 +939,22 @@ int gfs2_glock_wait(struct gfs2_holder *gh)
951 939
952void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) 940void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
953{ 941{
942 struct va_format vaf;
954 va_list args; 943 va_list args;
955 944
956 va_start(args, fmt); 945 va_start(args, fmt);
946
957 if (seq) { 947 if (seq) {
958 struct gfs2_glock_iter *gi = seq->private; 948 struct gfs2_glock_iter *gi = seq->private;
959 vsprintf(gi->string, fmt, args); 949 vsprintf(gi->string, fmt, args);
960 seq_printf(seq, gi->string); 950 seq_printf(seq, gi->string);
961 } else { 951 } else {
962 printk(KERN_ERR " "); 952 vaf.fmt = fmt;
963 vprintk(fmt, args); 953 vaf.va = &args;
954
955 printk(KERN_ERR " %pV", &vaf);
964 } 956 }
957
965 va_end(args); 958 va_end(args);
966} 959}
967 960
@@ -1361,24 +1354,28 @@ static int gfs2_should_freeze(const struct gfs2_glock *gl)
1361 * @gl: Pointer to the glock 1354 * @gl: Pointer to the glock
1362 * @ret: The return value from the dlm 1355 * @ret: The return value from the dlm
1363 * 1356 *
1357 * The gl_reply field is under the gl_spin lock so that it is ok
1358 * to use a bitfield shared with other glock state fields.
1364 */ 1359 */
1365 1360
1366void gfs2_glock_complete(struct gfs2_glock *gl, int ret) 1361void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
1367{ 1362{
1368 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; 1363 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
1369 1364
1365 spin_lock(&gl->gl_spin);
1370 gl->gl_reply = ret; 1366 gl->gl_reply = ret;
1371 1367
1372 if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) { 1368 if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) {
1373 spin_lock(&gl->gl_spin);
1374 if (gfs2_should_freeze(gl)) { 1369 if (gfs2_should_freeze(gl)) {
1375 set_bit(GLF_FROZEN, &gl->gl_flags); 1370 set_bit(GLF_FROZEN, &gl->gl_flags);
1376 spin_unlock(&gl->gl_spin); 1371 spin_unlock(&gl->gl_spin);
1377 return; 1372 return;
1378 } 1373 }
1379 spin_unlock(&gl->gl_spin);
1380 } 1374 }
1375
1376 spin_unlock(&gl->gl_spin);
1381 set_bit(GLF_REPLY_PENDING, &gl->gl_flags); 1377 set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1378 smp_wmb();
1382 gfs2_glock_hold(gl); 1379 gfs2_glock_hold(gl);
1383 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) 1380 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1384 gfs2_glock_put(gl); 1381 gfs2_glock_put(gl);
@@ -1626,18 +1623,17 @@ static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags)
1626static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) 1623static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
1627{ 1624{
1628 struct task_struct *gh_owner = NULL; 1625 struct task_struct *gh_owner = NULL;
1629 char buffer[KSYM_SYMBOL_LEN];
1630 char flags_buf[32]; 1626 char flags_buf[32];
1631 1627
1632 sprint_symbol(buffer, gh->gh_ip);
1633 if (gh->gh_owner_pid) 1628 if (gh->gh_owner_pid)
1634 gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); 1629 gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
1635 gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %s\n", 1630 gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n",
1636 state2str(gh->gh_state), 1631 state2str(gh->gh_state),
1637 hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags), 1632 hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags),
1638 gh->gh_error, 1633 gh->gh_error,
1639 gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, 1634 gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
1640 gh_owner ? gh_owner->comm : "(ended)", buffer); 1635 gh_owner ? gh_owner->comm : "(ended)",
1636 (void *)gh->gh_ip);
1641 return 0; 1637 return 0;
1642} 1638}
1643 1639
@@ -1782,12 +1778,13 @@ int __init gfs2_glock_init(void)
1782 } 1778 }
1783#endif 1779#endif
1784 1780
1785 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_RESCUER | 1781 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
1786 WQ_HIGHPRI | WQ_FREEZEABLE, 0); 1782 WQ_HIGHPRI | WQ_FREEZEABLE, 0);
1787 if (IS_ERR(glock_workqueue)) 1783 if (IS_ERR(glock_workqueue))
1788 return PTR_ERR(glock_workqueue); 1784 return PTR_ERR(glock_workqueue);
1789 gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", WQ_RESCUER | 1785 gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
1790 WQ_FREEZEABLE, 0); 1786 WQ_MEM_RECLAIM | WQ_FREEZEABLE,
1787 0);
1791 if (IS_ERR(gfs2_delete_workqueue)) { 1788 if (IS_ERR(gfs2_delete_workqueue)) {
1792 destroy_workqueue(glock_workqueue); 1789 destroy_workqueue(glock_workqueue);
1793 return PTR_ERR(gfs2_delete_workqueue); 1790 return PTR_ERR(gfs2_delete_workqueue);
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index db1c26d6d220..691851ceb615 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -87,11 +87,10 @@ enum {
87#define GL_ASYNC 0x00000040 87#define GL_ASYNC 0x00000040
88#define GL_EXACT 0x00000080 88#define GL_EXACT 0x00000080
89#define GL_SKIP 0x00000100 89#define GL_SKIP 0x00000100
90#define GL_ATIME 0x00000200
91#define GL_NOCACHE 0x00000400 90#define GL_NOCACHE 0x00000400
92 91
93/* 92/*
94 * lm_lock() and lm_async_cb return flags 93 * lm_async_cb return flags
95 * 94 *
96 * LM_OUT_ST_MASK 95 * LM_OUT_ST_MASK
97 * Masks the lower two bits of lock state in the returned value. 96 * Masks the lower two bits of lock state in the returned value.
@@ -99,15 +98,11 @@ enum {
99 * LM_OUT_CANCELED 98 * LM_OUT_CANCELED
100 * The lock request was canceled. 99 * The lock request was canceled.
101 * 100 *
102 * LM_OUT_ASYNC
103 * The result of the request will be returned in an LM_CB_ASYNC callback.
104 *
105 */ 101 */
106 102
107#define LM_OUT_ST_MASK 0x00000003 103#define LM_OUT_ST_MASK 0x00000003
108#define LM_OUT_CANCELED 0x00000008 104#define LM_OUT_CANCELED 0x00000008
109#define LM_OUT_ASYNC 0x00000080 105#define LM_OUT_ERROR 0x00000004
110#define LM_OUT_ERROR 0x00000100
111 106
112/* 107/*
113 * lm_recovery_done() messages 108 * lm_recovery_done() messages
@@ -124,25 +119,12 @@ struct lm_lockops {
124 void (*lm_unmount) (struct gfs2_sbd *sdp); 119 void (*lm_unmount) (struct gfs2_sbd *sdp);
125 void (*lm_withdraw) (struct gfs2_sbd *sdp); 120 void (*lm_withdraw) (struct gfs2_sbd *sdp);
126 void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl); 121 void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl);
127 unsigned int (*lm_lock) (struct gfs2_glock *gl, 122 int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state,
128 unsigned int req_state, unsigned int flags); 123 unsigned int flags);
129 void (*lm_cancel) (struct gfs2_glock *gl); 124 void (*lm_cancel) (struct gfs2_glock *gl);
130 const match_table_t *lm_tokens; 125 const match_table_t *lm_tokens;
131}; 126};
132 127
133#define LM_FLAG_TRY 0x00000001
134#define LM_FLAG_TRY_1CB 0x00000002
135#define LM_FLAG_NOEXP 0x00000004
136#define LM_FLAG_ANY 0x00000008
137#define LM_FLAG_PRIORITY 0x00000010
138
139#define GL_ASYNC 0x00000040
140#define GL_EXACT 0x00000080
141#define GL_SKIP 0x00000100
142#define GL_NOCACHE 0x00000400
143
144#define GLR_TRYFAILED 13
145
146extern struct workqueue_struct *gfs2_delete_workqueue; 128extern struct workqueue_struct *gfs2_delete_workqueue;
147static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) 129static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
148{ 130{
@@ -212,6 +194,8 @@ int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
212int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); 194int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
213void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); 195void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
214void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); 196void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
197
198__attribute__ ((format(printf, 2, 3)))
215void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); 199void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
216 200
217/** 201/**
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 0d149dcc04e5..263561bf1a50 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -325,7 +325,6 @@ static void trans_go_sync(struct gfs2_glock *gl)
325 325
326 if (gl->gl_state != LM_ST_UNLOCKED && 326 if (gl->gl_state != LM_ST_UNLOCKED &&
327 test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { 327 test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
328 flush_workqueue(gfs2_delete_workqueue);
329 gfs2_meta_syncfs(sdp); 328 gfs2_meta_syncfs(sdp);
330 gfs2_log_shutdown(sdp); 329 gfs2_log_shutdown(sdp);
331 } 330 }
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 764fbb49efc8..8d3d2b4a0a7d 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -207,12 +207,14 @@ struct gfs2_glock {
207 207
208 spinlock_t gl_spin; 208 spinlock_t gl_spin;
209 209
210 unsigned int gl_state; 210 /* State fields protected by gl_spin */
211 unsigned int gl_target; 211 unsigned int gl_state:2, /* Current state */
212 unsigned int gl_reply; 212 gl_target:2, /* Target state */
213 gl_demote_state:2, /* State requested by remote node */
214 gl_req:2, /* State in last dlm request */
215 gl_reply:8; /* Last reply from the dlm */
216
213 unsigned int gl_hash; 217 unsigned int gl_hash;
214 unsigned int gl_req;
215 unsigned int gl_demote_state; /* state requested by remote node */
216 unsigned long gl_demote_time; /* time of first demote request */ 218 unsigned long gl_demote_time; /* time of first demote request */
217 struct list_head gl_holders; 219 struct list_head gl_holders;
218 220
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index e1213f7f9217..14e682dbe8bf 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -916,17 +916,8 @@ static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
916 if (error) 916 if (error)
917 return error; 917 return error;
918 918
919 if ((attr->ia_valid & ATTR_SIZE) &&
920 attr->ia_size != i_size_read(inode)) {
921 error = vmtruncate(inode, attr->ia_size);
922 if (error)
923 return error;
924 }
925
926 setattr_copy(inode, attr); 919 setattr_copy(inode, attr);
927 mark_inode_dirty(inode); 920 mark_inode_dirty(inode);
928
929 gfs2_assert_warn(GFS2_SB(inode), !error);
930 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 921 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
931 gfs2_dinode_out(ip, dibh->b_data); 922 gfs2_dinode_out(ip, dibh->b_data);
932 brelse(dibh); 923 brelse(dibh);
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 1c09425b45fd..6e493aee28f8 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -146,15 +146,13 @@ static u32 make_flags(const u32 lkid, const unsigned int gfs_flags,
146 return lkf; 146 return lkf;
147} 147}
148 148
149static unsigned int gdlm_lock(struct gfs2_glock *gl, 149static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
150 unsigned int req_state, unsigned int flags) 150 unsigned int flags)
151{ 151{
152 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; 152 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
153 int error;
154 int req; 153 int req;
155 u32 lkf; 154 u32 lkf;
156 155
157 gl->gl_req = req_state;
158 req = make_mode(req_state); 156 req = make_mode(req_state);
159 lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req); 157 lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req);
160 158
@@ -162,13 +160,8 @@ static unsigned int gdlm_lock(struct gfs2_glock *gl,
162 * Submit the actual lock request. 160 * Submit the actual lock request.
163 */ 161 */
164 162
165 error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname, 163 return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname,
166 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); 164 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
167 if (error == -EAGAIN)
168 return 0;
169 if (error)
170 return LM_OUT_ERROR;
171 return LM_OUT_ASYNC;
172} 165}
173 166
174static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl) 167static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl)
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 12cbea7502c2..1db6b7343229 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -1069,7 +1069,6 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1069{ 1069{
1070 struct gfs2_inode *ip = GFS2_I(inode); 1070 struct gfs2_inode *ip = GFS2_I(inode);
1071 struct gfs2_sbd *sdp = GFS2_SB(inode); 1071 struct gfs2_sbd *sdp = GFS2_SB(inode);
1072 struct buffer_head *dibh;
1073 u32 ouid, ogid, nuid, ngid; 1072 u32 ouid, ogid, nuid, ngid;
1074 int error; 1073 int error;
1075 1074
@@ -1100,25 +1099,10 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1100 if (error) 1099 if (error)
1101 goto out_gunlock_q; 1100 goto out_gunlock_q;
1102 1101
1103 error = gfs2_meta_inode_buffer(ip, &dibh); 1102 error = gfs2_setattr_simple(ip, attr);
1104 if (error) 1103 if (error)
1105 goto out_end_trans; 1104 goto out_end_trans;
1106 1105
1107 if ((attr->ia_valid & ATTR_SIZE) &&
1108 attr->ia_size != i_size_read(inode)) {
1109 int error;
1110
1111 error = vmtruncate(inode, attr->ia_size);
1112 gfs2_assert_warn(sdp, !error);
1113 }
1114
1115 setattr_copy(inode, attr);
1116 mark_inode_dirty(inode);
1117
1118 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1119 gfs2_dinode_out(ip, dibh->b_data);
1120 brelse(dibh);
1121
1122 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { 1106 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
1123 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode); 1107 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
1124 gfs2_quota_change(ip, -blocks, ouid, ogid); 1108 gfs2_quota_change(ip, -blocks, ouid, ogid);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index f606baf9ba72..a689901963de 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -666,6 +666,10 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
666 qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); 666 qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift);
667 qd->qd_qb.qb_limit = qp->qu_limit; 667 qd->qd_qb.qb_limit = qp->qu_limit;
668 } 668 }
669 if (fdq->d_fieldmask & FS_DQ_BCOUNT) {
670 qp->qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift);
671 qd->qd_qb.qb_value = qp->qu_value;
672 }
669 } 673 }
670 674
671 /* Write the quota into the quota file on disk */ 675 /* Write the quota into the quota file on disk */
@@ -1509,7 +1513,7 @@ out:
1509} 1513}
1510 1514
1511/* GFS2 only supports a subset of the XFS fields */ 1515/* GFS2 only supports a subset of the XFS fields */
1512#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD) 1516#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD|FS_DQ_BCOUNT)
1513 1517
1514static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, 1518static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1515 struct fs_disk_quota *fdq) 1519 struct fs_disk_quota *fdq)
@@ -1569,9 +1573,15 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1569 if ((fdq->d_fieldmask & FS_DQ_BSOFT) && 1573 if ((fdq->d_fieldmask & FS_DQ_BSOFT) &&
1570 ((fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_warn))) 1574 ((fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_warn)))
1571 fdq->d_fieldmask ^= FS_DQ_BSOFT; 1575 fdq->d_fieldmask ^= FS_DQ_BSOFT;
1576
1572 if ((fdq->d_fieldmask & FS_DQ_BHARD) && 1577 if ((fdq->d_fieldmask & FS_DQ_BHARD) &&
1573 ((fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_limit))) 1578 ((fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_limit)))
1574 fdq->d_fieldmask ^= FS_DQ_BHARD; 1579 fdq->d_fieldmask ^= FS_DQ_BHARD;
1580
1581 if ((fdq->d_fieldmask & FS_DQ_BCOUNT) &&
1582 ((fdq->d_bcount >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_value)))
1583 fdq->d_fieldmask ^= FS_DQ_BCOUNT;
1584
1575 if (fdq->d_fieldmask == 0) 1585 if (fdq->d_fieldmask == 0)
1576 goto out_i; 1586 goto out_i;
1577 1587
@@ -1620,4 +1630,3 @@ const struct quotactl_ops gfs2_quotactl_ops = {
1620 .get_dqblk = gfs2_get_dqblk, 1630 .get_dqblk = gfs2_get_dqblk,
1621 .set_dqblk = gfs2_set_dqblk, 1631 .set_dqblk = gfs2_set_dqblk,
1622}; 1632};
1623
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 33c8407b876f..7293ea27020c 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -500,7 +500,7 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
500 for (rgrps = 0;; rgrps++) { 500 for (rgrps = 0;; rgrps++) {
501 loff_t pos = rgrps * sizeof(struct gfs2_rindex); 501 loff_t pos = rgrps * sizeof(struct gfs2_rindex);
502 502
503 if (pos + sizeof(struct gfs2_rindex) >= i_size_read(inode)) 503 if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
504 break; 504 break;
505 error = gfs2_internal_read(ip, &ra_state, buf, &pos, 505 error = gfs2_internal_read(ip, &ra_state, buf, &pos,
506 sizeof(struct gfs2_rindex)); 506 sizeof(struct gfs2_rindex));
@@ -583,7 +583,7 @@ static int read_rindex_entry(struct gfs2_inode *ip,
583 * Returns: 0 on successful update, error code otherwise 583 * Returns: 0 on successful update, error code otherwise
584 */ 584 */
585 585
586static int gfs2_ri_update(struct gfs2_inode *ip) 586int gfs2_ri_update(struct gfs2_inode *ip)
587{ 587{
588 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 588 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
589 struct inode *inode = &ip->i_inode; 589 struct inode *inode = &ip->i_inode;
@@ -614,46 +614,6 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
614} 614}
615 615
616/** 616/**
617 * gfs2_ri_update_special - Pull in a new resource index from the disk
618 *
619 * This is a special version that's safe to call from gfs2_inplace_reserve_i.
620 * In this case we know that we don't have any resource groups in memory yet.
621 *
622 * @ip: pointer to the rindex inode
623 *
624 * Returns: 0 on successful update, error code otherwise
625 */
626static int gfs2_ri_update_special(struct gfs2_inode *ip)
627{
628 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
629 struct inode *inode = &ip->i_inode;
630 struct file_ra_state ra_state;
631 struct gfs2_rgrpd *rgd;
632 unsigned int max_data = 0;
633 int error;
634
635 file_ra_state_init(&ra_state, inode->i_mapping);
636 for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
637 /* Ignore partials */
638 if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) >
639 i_size_read(inode))
640 break;
641 error = read_rindex_entry(ip, &ra_state);
642 if (error) {
643 clear_rgrpdi(sdp);
644 return error;
645 }
646 }
647 list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list)
648 if (rgd->rd_data > max_data)
649 max_data = rgd->rd_data;
650 sdp->sd_max_rg_data = max_data;
651
652 sdp->sd_rindex_uptodate = 1;
653 return 0;
654}
655
656/**
657 * gfs2_rindex_hold - Grab a lock on the rindex 617 * gfs2_rindex_hold - Grab a lock on the rindex
658 * @sdp: The GFS2 superblock 618 * @sdp: The GFS2 superblock
659 * @ri_gh: the glock holder 619 * @ri_gh: the glock holder
@@ -1226,16 +1186,25 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex,
1226 error = gfs2_rindex_hold(sdp, &al->al_ri_gh); 1186 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
1227 else if (!sdp->sd_rgrps) /* We may not have the rindex read 1187 else if (!sdp->sd_rgrps) /* We may not have the rindex read
1228 in, so: */ 1188 in, so: */
1229 error = gfs2_ri_update_special(ip); 1189 error = gfs2_ri_update(ip);
1230 if (error) 1190 if (error)
1231 return error; 1191 return error;
1232 } 1192 }
1233 1193
1194try_again:
1234 do { 1195 do {
1235 error = get_local_rgrp(ip, &last_unlinked); 1196 error = get_local_rgrp(ip, &last_unlinked);
1236 /* If there is no space, flushing the log may release some */ 1197 /* If there is no space, flushing the log may release some */
1237 if (error) 1198 if (error) {
1199 if (ip == GFS2_I(sdp->sd_rindex) &&
1200 !sdp->sd_rindex_uptodate) {
1201 error = gfs2_ri_update(ip);
1202 if (error)
1203 return error;
1204 goto try_again;
1205 }
1238 gfs2_log_flush(sdp, NULL); 1206 gfs2_log_flush(sdp, NULL);
1207 }
1239 } while (error && tries++ < 3); 1208 } while (error && tries++ < 3);
1240 1209
1241 if (error) { 1210 if (error) {
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 0e35c0466f9a..50c2bb04369c 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -48,6 +48,7 @@ extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex,
48 48
49extern void gfs2_inplace_release(struct gfs2_inode *ip); 49extern void gfs2_inplace_release(struct gfs2_inode *ip);
50 50
51extern int gfs2_ri_update(struct gfs2_inode *ip);
51extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); 52extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n);
52extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation); 53extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation);
53 54
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 30b58f07c8a6..439b61c03262 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -1296,10 +1296,8 @@ fail:
1296 1296
1297int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) 1297int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1298{ 1298{
1299 struct inode *inode = &ip->i_inode;
1300 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1299 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1301 struct gfs2_ea_location el; 1300 struct gfs2_ea_location el;
1302 struct buffer_head *dibh;
1303 int error; 1301 int error;
1304 1302
1305 error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, GFS2_POSIX_ACL_ACCESS, &el); 1303 error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, GFS2_POSIX_ACL_ACCESS, &el);
@@ -1321,26 +1319,7 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1321 if (error) 1319 if (error)
1322 return error; 1320 return error;
1323 1321
1324 error = gfs2_meta_inode_buffer(ip, &dibh); 1322 error = gfs2_setattr_simple(ip, attr);
1325 if (error)
1326 goto out_trans_end;
1327
1328 if ((attr->ia_valid & ATTR_SIZE) &&
1329 attr->ia_size != i_size_read(inode)) {
1330 int error;
1331
1332 error = vmtruncate(inode, attr->ia_size);
1333 gfs2_assert_warn(GFS2_SB(inode), !error);
1334 }
1335
1336 setattr_copy(inode, attr);
1337 mark_inode_dirty(inode);
1338
1339 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1340 gfs2_dinode_out(ip, dibh->b_data);
1341 brelse(dibh);
1342
1343out_trans_end:
1344 gfs2_trans_end(sdp); 1323 gfs2_trans_end(sdp);
1345 return error; 1324 return error;
1346} 1325}
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index f46ee8b0e135..9da29706f91c 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -828,7 +828,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb)
828 super->s_journal_seg[i] = segno; 828 super->s_journal_seg[i] = segno;
829 super->s_journal_ec[i] = ec; 829 super->s_journal_ec[i] = ec;
830 logfs_set_segment_reserved(sb, segno); 830 logfs_set_segment_reserved(sb, segno);
831 err = btree_insert32(head, segno, (void *)1, GFP_KERNEL); 831 err = btree_insert32(head, segno, (void *)1, GFP_NOFS);
832 BUG_ON(err); /* mempool should prevent this */ 832 BUG_ON(err); /* mempool should prevent this */
833 err = logfs_erase_segment(sb, segno, 1); 833 err = logfs_erase_segment(sb, segno, 1);
834 BUG_ON(err); /* FIXME: remount-ro would be nicer */ 834 BUG_ON(err); /* FIXME: remount-ro would be nicer */
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 6127baf0e188..ee99a9f5dfd3 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1994,6 +1994,9 @@ static int do_write_inode(struct inode *inode)
1994 1994
1995 /* FIXME: transaction is part of logfs_block now. Is that enough? */ 1995 /* FIXME: transaction is part of logfs_block now. Is that enough? */
1996 err = logfs_write_buf(master_inode, page, 0); 1996 err = logfs_write_buf(master_inode, page, 0);
1997 if (err)
1998 move_page_to_inode(inode, page);
1999
1997 logfs_put_write_page(page); 2000 logfs_put_write_page(page);
1998 return err; 2001 return err;
1999} 2002}
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index f1e962cb3b73..0d7c5540ad66 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -573,11 +573,14 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
573 /* this io's submitter should not have unlocked this before we could */ 573 /* this io's submitter should not have unlocked this before we could */
574 BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); 574 BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
575 575
576 if (ocfs2_iocb_is_sem_locked(iocb)) {
577 up_read(&inode->i_alloc_sem);
578 ocfs2_iocb_clear_sem_locked(iocb);
579 }
580
576 ocfs2_iocb_clear_rw_locked(iocb); 581 ocfs2_iocb_clear_rw_locked(iocb);
577 582
578 level = ocfs2_iocb_rw_locked_level(iocb); 583 level = ocfs2_iocb_rw_locked_level(iocb);
579 if (!level)
580 up_read(&inode->i_alloc_sem);
581 ocfs2_rw_unlock(inode, level); 584 ocfs2_rw_unlock(inode, level);
582 585
583 if (is_async) 586 if (is_async)
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 76bfdfda691a..eceb456037c1 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -68,8 +68,27 @@ static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level)
68 else 68 else
69 clear_bit(1, (unsigned long *)&iocb->private); 69 clear_bit(1, (unsigned long *)&iocb->private);
70} 70}
71
72/*
73 * Using a named enum representing lock types in terms of #N bit stored in
74 * iocb->private, which is going to be used for communication bewteen
75 * ocfs2_dio_end_io() and ocfs2_file_aio_write/read().
76 */
77enum ocfs2_iocb_lock_bits {
78 OCFS2_IOCB_RW_LOCK = 0,
79 OCFS2_IOCB_RW_LOCK_LEVEL,
80 OCFS2_IOCB_SEM,
81 OCFS2_IOCB_NUM_LOCKS
82};
83
71#define ocfs2_iocb_clear_rw_locked(iocb) \ 84#define ocfs2_iocb_clear_rw_locked(iocb) \
72 clear_bit(0, (unsigned long *)&iocb->private) 85 clear_bit(OCFS2_IOCB_RW_LOCK, (unsigned long *)&iocb->private)
73#define ocfs2_iocb_rw_locked_level(iocb) \ 86#define ocfs2_iocb_rw_locked_level(iocb) \
74 test_bit(1, (unsigned long *)&iocb->private) 87 test_bit(OCFS2_IOCB_RW_LOCK_LEVEL, (unsigned long *)&iocb->private)
88#define ocfs2_iocb_set_sem_locked(iocb) \
89 set_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
90#define ocfs2_iocb_clear_sem_locked(iocb) \
91 clear_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
92#define ocfs2_iocb_is_sem_locked(iocb) \
93 test_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
75#endif /* OCFS2_FILE_H */ 94#endif /* OCFS2_FILE_H */
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index c7fba396392d..6c61771469af 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -113,10 +113,11 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
113 define_mask(QUOTA), 113 define_mask(QUOTA),
114 define_mask(REFCOUNT), 114 define_mask(REFCOUNT),
115 define_mask(BASTS), 115 define_mask(BASTS),
116 define_mask(RESERVATIONS),
117 define_mask(CLUSTER),
116 define_mask(ERROR), 118 define_mask(ERROR),
117 define_mask(NOTICE), 119 define_mask(NOTICE),
118 define_mask(KTHREAD), 120 define_mask(KTHREAD),
119 define_mask(RESERVATIONS),
120}; 121};
121 122
122static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, }; 123static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, };
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index ea2ed9f56c94..34d6544357d9 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -81,7 +81,7 @@
81#include <linux/sched.h> 81#include <linux/sched.h>
82 82
83/* bits that are frequently given and infrequently matched in the low word */ 83/* bits that are frequently given and infrequently matched in the low word */
84/* NOTE: If you add a flag, you need to also update mlog.c! */ 84/* NOTE: If you add a flag, you need to also update masklog.c! */
85#define ML_ENTRY 0x0000000000000001ULL /* func call entry */ 85#define ML_ENTRY 0x0000000000000001ULL /* func call entry */
86#define ML_EXIT 0x0000000000000002ULL /* func call exit */ 86#define ML_EXIT 0x0000000000000002ULL /* func call exit */
87#define ML_TCP 0x0000000000000004ULL /* net cluster/tcp.c */ 87#define ML_TCP 0x0000000000000004ULL /* net cluster/tcp.c */
@@ -114,13 +114,14 @@
114#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */ 114#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */
115#define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */ 115#define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */
116#define ML_REFCOUNT 0x0000000080000000ULL /* refcount tree operations */ 116#define ML_REFCOUNT 0x0000000080000000ULL /* refcount tree operations */
117#define ML_BASTS 0x0000001000000000ULL /* dlmglue asts and basts */ 117#define ML_BASTS 0x0000000100000000ULL /* dlmglue asts and basts */
118#define ML_RESERVATIONS 0x0000000200000000ULL /* ocfs2 alloc reservations */
119#define ML_CLUSTER 0x0000000400000000ULL /* cluster stack */
120
118/* bits that are infrequently given and frequently matched in the high word */ 121/* bits that are infrequently given and frequently matched in the high word */
119#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ 122#define ML_ERROR 0x1000000000000000ULL /* sent to KERN_ERR */
120#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ 123#define ML_NOTICE 0x2000000000000000ULL /* setn to KERN_NOTICE */
121#define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */ 124#define ML_KTHREAD 0x4000000000000000ULL /* kernel thread activity */
122#define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */
123#define ML_CLUSTER 0x0000001000000000ULL /* cluster stack */
124 125
125#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE) 126#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE)
126#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT) 127#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT)
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index c49f6de0e7ab..d417b3f9b0c7 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2461,8 +2461,10 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2461 2461
2462 di->i_dx_root = cpu_to_le64(dr_blkno); 2462 di->i_dx_root = cpu_to_le64(dr_blkno);
2463 2463
2464 spin_lock(&OCFS2_I(dir)->ip_lock);
2464 OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL; 2465 OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL;
2465 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features); 2466 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
2467 spin_unlock(&OCFS2_I(dir)->ip_lock);
2466 2468
2467 ocfs2_journal_dirty(handle, di_bh); 2469 ocfs2_journal_dirty(handle, di_bh);
2468 2470
@@ -4466,8 +4468,10 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir,
4466 goto out_commit; 4468 goto out_commit;
4467 } 4469 }
4468 4470
4471 spin_lock(&OCFS2_I(dir)->ip_lock);
4469 OCFS2_I(dir)->ip_dyn_features &= ~OCFS2_INDEXED_DIR_FL; 4472 OCFS2_I(dir)->ip_dyn_features &= ~OCFS2_INDEXED_DIR_FL;
4470 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features); 4473 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
4474 spin_unlock(&OCFS2_I(dir)->ip_lock);
4471 di->i_dx_root = cpu_to_le64(0ULL); 4475 di->i_dx_root = cpu_to_le64(0ULL);
4472 4476
4473 ocfs2_journal_dirty(handle, di_bh); 4477 ocfs2_journal_dirty(handle, di_bh);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index f564b0e5f80d..59f0f6bdfc62 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2346,7 +2346,8 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data)
2346 */ 2346 */
2347static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, 2347static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
2348 struct dlm_lock_resource *res, 2348 struct dlm_lock_resource *res,
2349 int *numlocks) 2349 int *numlocks,
2350 int *hasrefs)
2350{ 2351{
2351 int ret; 2352 int ret;
2352 int i; 2353 int i;
@@ -2356,6 +2357,9 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
2356 2357
2357 assert_spin_locked(&res->spinlock); 2358 assert_spin_locked(&res->spinlock);
2358 2359
2360 *numlocks = 0;
2361 *hasrefs = 0;
2362
2359 ret = -EINVAL; 2363 ret = -EINVAL;
2360 if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { 2364 if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
2361 mlog(0, "cannot migrate lockres with unknown owner!\n"); 2365 mlog(0, "cannot migrate lockres with unknown owner!\n");
@@ -2386,7 +2390,13 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
2386 } 2390 }
2387 2391
2388 *numlocks = count; 2392 *numlocks = count;
2389 mlog(0, "migrateable lockres having %d locks\n", *numlocks); 2393
2394 count = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
2395 if (count < O2NM_MAX_NODES)
2396 *hasrefs = 1;
2397
2398 mlog(0, "%s: res %.*s, Migrateable, locks %d, refs %d\n", dlm->name,
2399 res->lockname.len, res->lockname.name, *numlocks, *hasrefs);
2390 2400
2391leave: 2401leave:
2392 return ret; 2402 return ret;
@@ -2408,7 +2418,7 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2408 const char *name; 2418 const char *name;
2409 unsigned int namelen; 2419 unsigned int namelen;
2410 int mle_added = 0; 2420 int mle_added = 0;
2411 int numlocks; 2421 int numlocks, hasrefs;
2412 int wake = 0; 2422 int wake = 0;
2413 2423
2414 if (!dlm_grab(dlm)) 2424 if (!dlm_grab(dlm))
@@ -2417,13 +2427,13 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2417 name = res->lockname.name; 2427 name = res->lockname.name;
2418 namelen = res->lockname.len; 2428 namelen = res->lockname.len;
2419 2429
2420 mlog(0, "migrating %.*s to %u\n", namelen, name, target); 2430 mlog(0, "%s: Migrating %.*s to %u\n", dlm->name, namelen, name, target);
2421 2431
2422 /* 2432 /*
2423 * ensure this lockres is a proper candidate for migration 2433 * ensure this lockres is a proper candidate for migration
2424 */ 2434 */
2425 spin_lock(&res->spinlock); 2435 spin_lock(&res->spinlock);
2426 ret = dlm_is_lockres_migrateable(dlm, res, &numlocks); 2436 ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs);
2427 if (ret < 0) { 2437 if (ret < 0) {
2428 spin_unlock(&res->spinlock); 2438 spin_unlock(&res->spinlock);
2429 goto leave; 2439 goto leave;
@@ -2431,10 +2441,8 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2431 spin_unlock(&res->spinlock); 2441 spin_unlock(&res->spinlock);
2432 2442
2433 /* no work to do */ 2443 /* no work to do */
2434 if (numlocks == 0) { 2444 if (numlocks == 0 && !hasrefs)
2435 mlog(0, "no locks were found on this lockres! done!\n");
2436 goto leave; 2445 goto leave;
2437 }
2438 2446
2439 /* 2447 /*
2440 * preallocate up front 2448 * preallocate up front
@@ -2459,14 +2467,14 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2459 * find a node to migrate the lockres to 2467 * find a node to migrate the lockres to
2460 */ 2468 */
2461 2469
2462 mlog(0, "picking a migration node\n");
2463 spin_lock(&dlm->spinlock); 2470 spin_lock(&dlm->spinlock);
2464 /* pick a new node */ 2471 /* pick a new node */
2465 if (!test_bit(target, dlm->domain_map) || 2472 if (!test_bit(target, dlm->domain_map) ||
2466 target >= O2NM_MAX_NODES) { 2473 target >= O2NM_MAX_NODES) {
2467 target = dlm_pick_migration_target(dlm, res); 2474 target = dlm_pick_migration_target(dlm, res);
2468 } 2475 }
2469 mlog(0, "node %u chosen for migration\n", target); 2476 mlog(0, "%s: res %.*s, Node %u chosen for migration\n", dlm->name,
2477 namelen, name, target);
2470 2478
2471 if (target >= O2NM_MAX_NODES || 2479 if (target >= O2NM_MAX_NODES ||
2472 !test_bit(target, dlm->domain_map)) { 2480 !test_bit(target, dlm->domain_map)) {
@@ -2667,7 +2675,7 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
2667{ 2675{
2668 int ret; 2676 int ret;
2669 int lock_dropped = 0; 2677 int lock_dropped = 0;
2670 int numlocks; 2678 int numlocks, hasrefs;
2671 2679
2672 spin_lock(&res->spinlock); 2680 spin_lock(&res->spinlock);
2673 if (res->owner != dlm->node_num) { 2681 if (res->owner != dlm->node_num) {
@@ -2681,8 +2689,8 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
2681 } 2689 }
2682 2690
2683 /* No need to migrate a lockres having no locks */ 2691 /* No need to migrate a lockres having no locks */
2684 ret = dlm_is_lockres_migrateable(dlm, res, &numlocks); 2692 ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs);
2685 if (ret >= 0 && numlocks == 0) { 2693 if (ret >= 0 && numlocks == 0 && !hasrefs) {
2686 spin_unlock(&res->spinlock); 2694 spin_unlock(&res->spinlock);
2687 goto leave; 2695 goto leave;
2688 } 2696 }
@@ -2915,6 +2923,12 @@ static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
2915 } 2923 }
2916 queue++; 2924 queue++;
2917 } 2925 }
2926
2927 nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
2928 if (nodenum < O2NM_MAX_NODES) {
2929 spin_unlock(&res->spinlock);
2930 return nodenum;
2931 }
2918 spin_unlock(&res->spinlock); 2932 spin_unlock(&res->spinlock);
2919 mlog(0, "have not found a suitable target yet! checking domain map\n"); 2933 mlog(0, "have not found a suitable target yet! checking domain map\n");
2920 2934
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 77b4c04a2809..f6cba566429d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2241,11 +2241,15 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
2241 2241
2242 mutex_lock(&inode->i_mutex); 2242 mutex_lock(&inode->i_mutex);
2243 2243
2244 ocfs2_iocb_clear_sem_locked(iocb);
2245
2244relock: 2246relock:
2245 /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ 2247 /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
2246 if (direct_io) { 2248 if (direct_io) {
2247 down_read(&inode->i_alloc_sem); 2249 down_read(&inode->i_alloc_sem);
2248 have_alloc_sem = 1; 2250 have_alloc_sem = 1;
2251 /* communicate with ocfs2_dio_end_io */
2252 ocfs2_iocb_set_sem_locked(iocb);
2249 } 2253 }
2250 2254
2251 /* 2255 /*
@@ -2382,8 +2386,10 @@ out:
2382 ocfs2_rw_unlock(inode, rw_level); 2386 ocfs2_rw_unlock(inode, rw_level);
2383 2387
2384out_sems: 2388out_sems:
2385 if (have_alloc_sem) 2389 if (have_alloc_sem) {
2386 up_read(&inode->i_alloc_sem); 2390 up_read(&inode->i_alloc_sem);
2391 ocfs2_iocb_clear_sem_locked(iocb);
2392 }
2387 2393
2388 mutex_unlock(&inode->i_mutex); 2394 mutex_unlock(&inode->i_mutex);
2389 2395
@@ -2527,6 +2533,8 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2527 goto bail; 2533 goto bail;
2528 } 2534 }
2529 2535
2536 ocfs2_iocb_clear_sem_locked(iocb);
2537
2530 /* 2538 /*
2531 * buffered reads protect themselves in ->readpage(). O_DIRECT reads 2539 * buffered reads protect themselves in ->readpage(). O_DIRECT reads
2532 * need locks to protect pending reads from racing with truncate. 2540 * need locks to protect pending reads from racing with truncate.
@@ -2534,6 +2542,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2534 if (filp->f_flags & O_DIRECT) { 2542 if (filp->f_flags & O_DIRECT) {
2535 down_read(&inode->i_alloc_sem); 2543 down_read(&inode->i_alloc_sem);
2536 have_alloc_sem = 1; 2544 have_alloc_sem = 1;
2545 ocfs2_iocb_set_sem_locked(iocb);
2537 2546
2538 ret = ocfs2_rw_lock(inode, 0); 2547 ret = ocfs2_rw_lock(inode, 0);
2539 if (ret < 0) { 2548 if (ret < 0) {
@@ -2575,8 +2584,10 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2575 } 2584 }
2576 2585
2577bail: 2586bail:
2578 if (have_alloc_sem) 2587 if (have_alloc_sem) {
2579 up_read(&inode->i_alloc_sem); 2588 up_read(&inode->i_alloc_sem);
2589 ocfs2_iocb_clear_sem_locked(iocb);
2590 }
2580 if (rw_level != -1) 2591 if (rw_level != -1)
2581 ocfs2_rw_unlock(inode, rw_level); 2592 ocfs2_rw_unlock(inode, rw_level);
2582 mlog_exit(ret); 2593 mlog_exit(ret);
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index c2e4f8222e2f..bf2e7764920e 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -350,7 +350,7 @@ enum {
350#define OCFS2_LAST_LOCAL_SYSTEM_INODE LOCAL_GROUP_QUOTA_SYSTEM_INODE 350#define OCFS2_LAST_LOCAL_SYSTEM_INODE LOCAL_GROUP_QUOTA_SYSTEM_INODE
351 NUM_SYSTEM_INODES 351 NUM_SYSTEM_INODES
352}; 352};
353#define NUM_GLOBAL_SYSTEM_INODES OCFS2_LAST_GLOBAL_SYSTEM_INODE 353#define NUM_GLOBAL_SYSTEM_INODES OCFS2_FIRST_LOCAL_SYSTEM_INODE
354#define NUM_LOCAL_SYSTEM_INODES \ 354#define NUM_LOCAL_SYSTEM_INODES \
355 (NUM_SYSTEM_INODES - OCFS2_FIRST_LOCAL_SYSTEM_INODE) 355 (NUM_SYSTEM_INODES - OCFS2_FIRST_LOCAL_SYSTEM_INODE)
356 356
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 182845147fe4..08cba2c3b612 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1407,6 +1407,82 @@ static const struct file_operations proc_pid_sched_operations = {
1407 1407
1408#endif 1408#endif
1409 1409
1410#ifdef CONFIG_SCHED_AUTOGROUP
1411/*
1412 * Print out autogroup related information:
1413 */
1414static int sched_autogroup_show(struct seq_file *m, void *v)
1415{
1416 struct inode *inode = m->private;
1417 struct task_struct *p;
1418
1419 p = get_proc_task(inode);
1420 if (!p)
1421 return -ESRCH;
1422 proc_sched_autogroup_show_task(p, m);
1423
1424 put_task_struct(p);
1425
1426 return 0;
1427}
1428
1429static ssize_t
1430sched_autogroup_write(struct file *file, const char __user *buf,
1431 size_t count, loff_t *offset)
1432{
1433 struct inode *inode = file->f_path.dentry->d_inode;
1434 struct task_struct *p;
1435 char buffer[PROC_NUMBUF];
1436 long nice;
1437 int err;
1438
1439 memset(buffer, 0, sizeof(buffer));
1440 if (count > sizeof(buffer) - 1)
1441 count = sizeof(buffer) - 1;
1442 if (copy_from_user(buffer, buf, count))
1443 return -EFAULT;
1444
1445 err = strict_strtol(strstrip(buffer), 0, &nice);
1446 if (err)
1447 return -EINVAL;
1448
1449 p = get_proc_task(inode);
1450 if (!p)
1451 return -ESRCH;
1452
1453 err = nice;
1454 err = proc_sched_autogroup_set_nice(p, &err);
1455 if (err)
1456 count = err;
1457
1458 put_task_struct(p);
1459
1460 return count;
1461}
1462
1463static int sched_autogroup_open(struct inode *inode, struct file *filp)
1464{
1465 int ret;
1466
1467 ret = single_open(filp, sched_autogroup_show, NULL);
1468 if (!ret) {
1469 struct seq_file *m = filp->private_data;
1470
1471 m->private = inode;
1472 }
1473 return ret;
1474}
1475
1476static const struct file_operations proc_pid_sched_autogroup_operations = {
1477 .open = sched_autogroup_open,
1478 .read = seq_read,
1479 .write = sched_autogroup_write,
1480 .llseek = seq_lseek,
1481 .release = single_release,
1482};
1483
1484#endif /* CONFIG_SCHED_AUTOGROUP */
1485
1410static ssize_t comm_write(struct file *file, const char __user *buf, 1486static ssize_t comm_write(struct file *file, const char __user *buf,
1411 size_t count, loff_t *offset) 1487 size_t count, loff_t *offset)
1412{ 1488{
@@ -2733,6 +2809,9 @@ static const struct pid_entry tgid_base_stuff[] = {
2733#ifdef CONFIG_SCHED_DEBUG 2809#ifdef CONFIG_SCHED_DEBUG
2734 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2810 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
2735#endif 2811#endif
2812#ifdef CONFIG_SCHED_AUTOGROUP
2813 REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
2814#endif
2736 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 2815 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
2737#ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2816#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2738 INF("syscall", S_IRUSR, proc_pid_syscall), 2817 INF("syscall", S_IRUSR, proc_pid_syscall),
diff --git a/include/linux/completion.h b/include/linux/completion.h
index 36d57f74cd01..51494e6b5548 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -81,10 +81,10 @@ extern int wait_for_completion_interruptible(struct completion *x);
81extern int wait_for_completion_killable(struct completion *x); 81extern int wait_for_completion_killable(struct completion *x);
82extern unsigned long wait_for_completion_timeout(struct completion *x, 82extern unsigned long wait_for_completion_timeout(struct completion *x,
83 unsigned long timeout); 83 unsigned long timeout);
84extern unsigned long wait_for_completion_interruptible_timeout( 84extern long wait_for_completion_interruptible_timeout(
85 struct completion *x, unsigned long timeout); 85 struct completion *x, unsigned long timeout);
86extern unsigned long wait_for_completion_killable_timeout( 86extern long wait_for_completion_killable_timeout(
87 struct completion *x, unsigned long timeout); 87 struct completion *x, unsigned long timeout);
88extern bool try_wait_for_completion(struct completion *x); 88extern bool try_wait_for_completion(struct completion *x);
89extern bool completion_done(struct completion *x); 89extern bool completion_done(struct completion *x);
90 90
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 9d8688b92d8b..8cd00ad98d37 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -824,6 +824,8 @@ enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie);
824#ifdef CONFIG_DMA_ENGINE 824#ifdef CONFIG_DMA_ENGINE
825enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx); 825enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx);
826void dma_issue_pending_all(void); 826void dma_issue_pending_all(void);
827struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param);
828void dma_release_channel(struct dma_chan *chan);
827#else 829#else
828static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) 830static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
829{ 831{
@@ -831,7 +833,14 @@ static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descript
831} 833}
832static inline void dma_issue_pending_all(void) 834static inline void dma_issue_pending_all(void)
833{ 835{
834 do { } while (0); 836}
837static inline struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask,
838 dma_filter_fn fn, void *fn_param)
839{
840 return NULL;
841}
842static inline void dma_release_channel(struct dma_chan *chan)
843{
835} 844}
836#endif 845#endif
837 846
@@ -842,8 +851,6 @@ void dma_async_device_unregister(struct dma_device *device);
842void dma_run_dependencies(struct dma_async_tx_descriptor *tx); 851void dma_run_dependencies(struct dma_async_tx_descriptor *tx);
843struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type); 852struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type);
844#define dma_request_channel(mask, x, y) __dma_request_channel(&(mask), x, y) 853#define dma_request_channel(mask, x, y) __dma_request_channel(&(mask), x, y)
845struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param);
846void dma_release_channel(struct dma_chan *chan);
847 854
848/* --- Helper iov-locking functions --- */ 855/* --- Helper iov-locking functions --- */
849 856
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 8beabb958f61..47e3997f7b5c 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -154,12 +154,14 @@ enum {
154 TRACE_EVENT_FL_ENABLED_BIT, 154 TRACE_EVENT_FL_ENABLED_BIT,
155 TRACE_EVENT_FL_FILTERED_BIT, 155 TRACE_EVENT_FL_FILTERED_BIT,
156 TRACE_EVENT_FL_RECORDED_CMD_BIT, 156 TRACE_EVENT_FL_RECORDED_CMD_BIT,
157 TRACE_EVENT_FL_CAP_ANY_BIT,
157}; 158};
158 159
159enum { 160enum {
160 TRACE_EVENT_FL_ENABLED = (1 << TRACE_EVENT_FL_ENABLED_BIT), 161 TRACE_EVENT_FL_ENABLED = (1 << TRACE_EVENT_FL_ENABLED_BIT),
161 TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), 162 TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT),
162 TRACE_EVENT_FL_RECORDED_CMD = (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT), 163 TRACE_EVENT_FL_RECORDED_CMD = (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT),
164 TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
163}; 165};
164 166
165struct ftrace_event_call { 167struct ftrace_event_call {
@@ -196,6 +198,14 @@ struct ftrace_event_call {
196#endif 198#endif
197}; 199};
198 200
201#define __TRACE_EVENT_FLAGS(name, value) \
202 static int __init trace_init_flags_##name(void) \
203 { \
204 event_##name.flags = value; \
205 return 0; \
206 } \
207 early_initcall(trace_init_flags_##name);
208
199#define PERF_MAX_TRACE_SIZE 2048 209#define PERF_MAX_TRACE_SIZE 2048
200 210
201#define MAX_FILTER_PRED 32 211#define MAX_FILTER_PRED 32
@@ -215,6 +225,10 @@ enum {
215 FILTER_PTR_STRING, 225 FILTER_PTR_STRING,
216}; 226};
217 227
228#define EVENT_STORAGE_SIZE 128
229extern struct mutex event_storage_mutex;
230extern char event_storage[EVENT_STORAGE_SIZE];
231
218extern int trace_event_raw_init(struct ftrace_event_call *call); 232extern int trace_event_raw_init(struct ftrace_event_call *call);
219extern int trace_define_field(struct ftrace_event_call *call, const char *type, 233extern int trace_define_field(struct ftrace_event_call *call, const char *type,
220 const char *name, int offset, int size, 234 const char *name, int offset, int size,
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index fd0c1b857d3d..330586ffffbb 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -22,7 +22,7 @@
22#include <linux/wait.h> 22#include <linux/wait.h>
23#include <linux/percpu.h> 23#include <linux/percpu.h>
24#include <linux/timer.h> 24#include <linux/timer.h>
25 25#include <linux/timerqueue.h>
26 26
27struct hrtimer_clock_base; 27struct hrtimer_clock_base;
28struct hrtimer_cpu_base; 28struct hrtimer_cpu_base;
@@ -79,8 +79,8 @@ enum hrtimer_restart {
79 79
80/** 80/**
81 * struct hrtimer - the basic hrtimer structure 81 * struct hrtimer - the basic hrtimer structure
82 * @node: red black tree node for time ordered insertion 82 * @node: timerqueue node, which also manages node.expires,
83 * @_expires: the absolute expiry time in the hrtimers internal 83 * the absolute expiry time in the hrtimers internal
84 * representation. The time is related to the clock on 84 * representation. The time is related to the clock on
85 * which the timer is based. Is setup by adding 85 * which the timer is based. Is setup by adding
86 * slack to the _softexpires value. For non range timers 86 * slack to the _softexpires value. For non range timers
@@ -101,8 +101,7 @@ enum hrtimer_restart {
101 * The hrtimer structure must be initialized by hrtimer_init() 101 * The hrtimer structure must be initialized by hrtimer_init()
102 */ 102 */
103struct hrtimer { 103struct hrtimer {
104 struct rb_node node; 104 struct timerqueue_node node;
105 ktime_t _expires;
106 ktime_t _softexpires; 105 ktime_t _softexpires;
107 enum hrtimer_restart (*function)(struct hrtimer *); 106 enum hrtimer_restart (*function)(struct hrtimer *);
108 struct hrtimer_clock_base *base; 107 struct hrtimer_clock_base *base;
@@ -141,8 +140,7 @@ struct hrtimer_sleeper {
141struct hrtimer_clock_base { 140struct hrtimer_clock_base {
142 struct hrtimer_cpu_base *cpu_base; 141 struct hrtimer_cpu_base *cpu_base;
143 clockid_t index; 142 clockid_t index;
144 struct rb_root active; 143 struct timerqueue_head active;
145 struct rb_node *first;
146 ktime_t resolution; 144 ktime_t resolution;
147 ktime_t (*get_time)(void); 145 ktime_t (*get_time)(void);
148 ktime_t softirq_time; 146 ktime_t softirq_time;
@@ -158,7 +156,6 @@ struct hrtimer_clock_base {
158 * @lock: lock protecting the base and associated clock bases 156 * @lock: lock protecting the base and associated clock bases
159 * and timers 157 * and timers
160 * @clock_base: array of clock bases for this cpu 158 * @clock_base: array of clock bases for this cpu
161 * @curr_timer: the timer which is executing a callback right now
162 * @expires_next: absolute time of the next event which was scheduled 159 * @expires_next: absolute time of the next event which was scheduled
163 * via clock_set_next_event() 160 * via clock_set_next_event()
164 * @hres_active: State of high resolution mode 161 * @hres_active: State of high resolution mode
@@ -184,43 +181,43 @@ struct hrtimer_cpu_base {
184 181
185static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) 182static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
186{ 183{
187 timer->_expires = time; 184 timer->node.expires = time;
188 timer->_softexpires = time; 185 timer->_softexpires = time;
189} 186}
190 187
191static inline void hrtimer_set_expires_range(struct hrtimer *timer, ktime_t time, ktime_t delta) 188static inline void hrtimer_set_expires_range(struct hrtimer *timer, ktime_t time, ktime_t delta)
192{ 189{
193 timer->_softexpires = time; 190 timer->_softexpires = time;
194 timer->_expires = ktime_add_safe(time, delta); 191 timer->node.expires = ktime_add_safe(time, delta);
195} 192}
196 193
197static inline void hrtimer_set_expires_range_ns(struct hrtimer *timer, ktime_t time, unsigned long delta) 194static inline void hrtimer_set_expires_range_ns(struct hrtimer *timer, ktime_t time, unsigned long delta)
198{ 195{
199 timer->_softexpires = time; 196 timer->_softexpires = time;
200 timer->_expires = ktime_add_safe(time, ns_to_ktime(delta)); 197 timer->node.expires = ktime_add_safe(time, ns_to_ktime(delta));
201} 198}
202 199
203static inline void hrtimer_set_expires_tv64(struct hrtimer *timer, s64 tv64) 200static inline void hrtimer_set_expires_tv64(struct hrtimer *timer, s64 tv64)
204{ 201{
205 timer->_expires.tv64 = tv64; 202 timer->node.expires.tv64 = tv64;
206 timer->_softexpires.tv64 = tv64; 203 timer->_softexpires.tv64 = tv64;
207} 204}
208 205
209static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time) 206static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time)
210{ 207{
211 timer->_expires = ktime_add_safe(timer->_expires, time); 208 timer->node.expires = ktime_add_safe(timer->node.expires, time);
212 timer->_softexpires = ktime_add_safe(timer->_softexpires, time); 209 timer->_softexpires = ktime_add_safe(timer->_softexpires, time);
213} 210}
214 211
215static inline void hrtimer_add_expires_ns(struct hrtimer *timer, u64 ns) 212static inline void hrtimer_add_expires_ns(struct hrtimer *timer, u64 ns)
216{ 213{
217 timer->_expires = ktime_add_ns(timer->_expires, ns); 214 timer->node.expires = ktime_add_ns(timer->node.expires, ns);
218 timer->_softexpires = ktime_add_ns(timer->_softexpires, ns); 215 timer->_softexpires = ktime_add_ns(timer->_softexpires, ns);
219} 216}
220 217
221static inline ktime_t hrtimer_get_expires(const struct hrtimer *timer) 218static inline ktime_t hrtimer_get_expires(const struct hrtimer *timer)
222{ 219{
223 return timer->_expires; 220 return timer->node.expires;
224} 221}
225 222
226static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer) 223static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer)
@@ -230,7 +227,7 @@ static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer)
230 227
231static inline s64 hrtimer_get_expires_tv64(const struct hrtimer *timer) 228static inline s64 hrtimer_get_expires_tv64(const struct hrtimer *timer)
232{ 229{
233 return timer->_expires.tv64; 230 return timer->node.expires.tv64;
234} 231}
235static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer) 232static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer)
236{ 233{
@@ -239,12 +236,12 @@ static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer)
239 236
240static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer) 237static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer)
241{ 238{
242 return ktime_to_ns(timer->_expires); 239 return ktime_to_ns(timer->node.expires);
243} 240}
244 241
245static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) 242static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
246{ 243{
247 return ktime_sub(timer->_expires, timer->base->get_time()); 244 return ktime_sub(timer->node.expires, timer->base->get_time());
248} 245}
249 246
250#ifdef CONFIG_HIGH_RES_TIMERS 247#ifdef CONFIG_HIGH_RES_TIMERS
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 1f8c06ce0fa6..caa151fbebb7 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -12,6 +12,13 @@
12#include <linux/securebits.h> 12#include <linux/securebits.h>
13#include <net/net_namespace.h> 13#include <net/net_namespace.h>
14 14
15#ifdef CONFIG_SMP
16# define INIT_PUSHABLE_TASKS(tsk) \
17 .pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO),
18#else
19# define INIT_PUSHABLE_TASKS(tsk)
20#endif
21
15extern struct files_struct init_files; 22extern struct files_struct init_files;
16extern struct fs_struct init_fs; 23extern struct fs_struct init_fs;
17 24
@@ -83,6 +90,12 @@ extern struct group_info init_groups;
83 */ 90 */
84# define CAP_INIT_BSET CAP_FULL_SET 91# define CAP_INIT_BSET CAP_FULL_SET
85 92
93#ifdef CONFIG_RCU_BOOST
94#define INIT_TASK_RCU_BOOST() \
95 .rcu_boost_mutex = NULL,
96#else
97#define INIT_TASK_RCU_BOOST()
98#endif
86#ifdef CONFIG_TREE_PREEMPT_RCU 99#ifdef CONFIG_TREE_PREEMPT_RCU
87#define INIT_TASK_RCU_TREE_PREEMPT() \ 100#define INIT_TASK_RCU_TREE_PREEMPT() \
88 .rcu_blocked_node = NULL, 101 .rcu_blocked_node = NULL,
@@ -94,7 +107,8 @@ extern struct group_info init_groups;
94 .rcu_read_lock_nesting = 0, \ 107 .rcu_read_lock_nesting = 0, \
95 .rcu_read_unlock_special = 0, \ 108 .rcu_read_unlock_special = 0, \
96 .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \ 109 .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \
97 INIT_TASK_RCU_TREE_PREEMPT() 110 INIT_TASK_RCU_TREE_PREEMPT() \
111 INIT_TASK_RCU_BOOST()
98#else 112#else
99#define INIT_TASK_RCU_PREEMPT(tsk) 113#define INIT_TASK_RCU_PREEMPT(tsk)
100#endif 114#endif
@@ -137,7 +151,7 @@ extern struct cred init_cred;
137 .nr_cpus_allowed = NR_CPUS, \ 151 .nr_cpus_allowed = NR_CPUS, \
138 }, \ 152 }, \
139 .tasks = LIST_HEAD_INIT(tsk.tasks), \ 153 .tasks = LIST_HEAD_INIT(tsk.tasks), \
140 .pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), \ 154 INIT_PUSHABLE_TASKS(tsk) \
141 .ptraced = LIST_HEAD_INIT(tsk.ptraced), \ 155 .ptraced = LIST_HEAD_INIT(tsk.ptraced), \
142 .ptrace_entry = LIST_HEAD_INIT(tsk.ptrace_entry), \ 156 .ptrace_entry = LIST_HEAD_INIT(tsk.ptrace_entry), \
143 .real_parent = &tsk, \ 157 .real_parent = &tsk, \
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index e7d1b2e0070d..b78edb58ee66 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -275,7 +275,9 @@ extern int arch_prepared_optinsn(struct arch_optimized_insn *optinsn);
275extern int arch_check_optimized_kprobe(struct optimized_kprobe *op); 275extern int arch_check_optimized_kprobe(struct optimized_kprobe *op);
276extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op); 276extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op);
277extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op); 277extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op);
278extern int arch_optimize_kprobe(struct optimized_kprobe *op); 278extern void arch_optimize_kprobes(struct list_head *oplist);
279extern void arch_unoptimize_kprobes(struct list_head *oplist,
280 struct list_head *done_list);
279extern void arch_unoptimize_kprobe(struct optimized_kprobe *op); 281extern void arch_unoptimize_kprobe(struct optimized_kprobe *op);
280extern kprobe_opcode_t *get_optinsn_slot(void); 282extern kprobe_opcode_t *get_optinsn_slot(void);
281extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty); 283extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty);
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 685ea65eb803..ce0775aa64c3 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -81,16 +81,41 @@ struct kthread_work {
81#define DEFINE_KTHREAD_WORK(work, fn) \ 81#define DEFINE_KTHREAD_WORK(work, fn) \
82 struct kthread_work work = KTHREAD_WORK_INIT(work, fn) 82 struct kthread_work work = KTHREAD_WORK_INIT(work, fn)
83 83
84static inline void init_kthread_worker(struct kthread_worker *worker) 84/*
85{ 85 * kthread_worker.lock and kthread_work.done need their own lockdep class
86 *worker = (struct kthread_worker)KTHREAD_WORKER_INIT(*worker); 86 * keys if they are defined on stack with lockdep enabled. Use the
87} 87 * following macros when defining them on stack.
88 88 */
89static inline void init_kthread_work(struct kthread_work *work, 89#ifdef CONFIG_LOCKDEP
90 kthread_work_func_t fn) 90# define KTHREAD_WORKER_INIT_ONSTACK(worker) \
91{ 91 ({ init_kthread_worker(&worker); worker; })
92 *work = (struct kthread_work)KTHREAD_WORK_INIT(*work, fn); 92# define DEFINE_KTHREAD_WORKER_ONSTACK(worker) \
93} 93 struct kthread_worker worker = KTHREAD_WORKER_INIT_ONSTACK(worker)
94# define KTHREAD_WORK_INIT_ONSTACK(work, fn) \
95 ({ init_kthread_work((&work), fn); work; })
96# define DEFINE_KTHREAD_WORK_ONSTACK(work, fn) \
97 struct kthread_work work = KTHREAD_WORK_INIT_ONSTACK(work, fn)
98#else
99# define DEFINE_KTHREAD_WORKER_ONSTACK(worker) DEFINE_KTHREAD_WORKER(worker)
100# define DEFINE_KTHREAD_WORK_ONSTACK(work, fn) DEFINE_KTHREAD_WORK(work, fn)
101#endif
102
103extern void __init_kthread_worker(struct kthread_worker *worker,
104 const char *name, struct lock_class_key *key);
105
106#define init_kthread_worker(worker) \
107 do { \
108 static struct lock_class_key __key; \
109 __init_kthread_worker((worker), "("#worker")->lock", &__key); \
110 } while (0)
111
112#define init_kthread_work(work, fn) \
113 do { \
114 memset((work), 0, sizeof(struct kthread_work)); \
115 INIT_LIST_HEAD(&(work)->node); \
116 (work)->func = (fn); \
117 init_waitqueue_head(&(work)->done); \
118 } while (0)
94 119
95int kthread_worker_fn(void *worker_ptr); 120int kthread_worker_fn(void *worker_ptr);
96 121
diff --git a/include/linux/module.h b/include/linux/module.h
index 7575bbbdf2a2..8b17fd8c790d 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -308,6 +308,9 @@ struct module
308 /* The size of the executable code in each section. */ 308 /* The size of the executable code in each section. */
309 unsigned int init_text_size, core_text_size; 309 unsigned int init_text_size, core_text_size;
310 310
311 /* Size of RO sections of the module (text+rodata) */
312 unsigned int init_ro_size, core_ro_size;
313
311 /* Arch-specific module values */ 314 /* Arch-specific module values */
312 struct mod_arch_specific arch; 315 struct mod_arch_specific arch;
313 316
@@ -672,7 +675,6 @@ static inline int module_get_iter_tracepoints(struct tracepoint_iter *iter)
672{ 675{
673 return 0; 676 return 0;
674} 677}
675
676#endif /* CONFIG_MODULES */ 678#endif /* CONFIG_MODULES */
677 679
678#ifdef CONFIG_SYSFS 680#ifdef CONFIG_SYSFS
@@ -687,6 +689,13 @@ extern int module_sysfs_initialized;
687 689
688#define __MODULE_STRING(x) __stringify(x) 690#define __MODULE_STRING(x) __stringify(x)
689 691
692#ifdef CONFIG_DEBUG_SET_MODULE_RONX
693extern void set_all_modules_text_rw(void);
694extern void set_all_modules_text_ro(void);
695#else
696static inline void set_all_modules_text_rw(void) { }
697static inline void set_all_modules_text_ro(void) { }
698#endif
690 699
691#ifdef CONFIG_GENERIC_BUG 700#ifdef CONFIG_GENERIC_BUG
692void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, 701void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *,
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index f363bc8fdc74..94b48bd40dd7 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -160,4 +160,8 @@ extern int mutex_trylock(struct mutex *lock);
160extern void mutex_unlock(struct mutex *lock); 160extern void mutex_unlock(struct mutex *lock);
161extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); 161extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
162 162
163#ifndef CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX
164#define arch_mutex_cpu_relax() cpu_relax()
165#endif
166
163#endif 167#endif
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 123566912d73..e2b9e63afa68 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -70,7 +70,7 @@ struct nlmsghdr {
70 Check NLM_F_EXCL 70 Check NLM_F_EXCL
71 */ 71 */
72 72
73#define NLMSG_ALIGNTO 4 73#define NLMSG_ALIGNTO 4U
74#define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) ) 74#define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) )
75#define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr))) 75#define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr)))
76#define NLMSG_LENGTH(len) ((len)+NLMSG_ALIGN(NLMSG_HDRLEN)) 76#define NLMSG_LENGTH(len) ((len)+NLMSG_ALIGN(NLMSG_HDRLEN))
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 06aab5eee134..c536f8545f74 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -14,22 +14,14 @@
14 * may be used to reset the timeout - for code which intentionally 14 * may be used to reset the timeout - for code which intentionally
15 * disables interrupts for a long time. This call is stateless. 15 * disables interrupts for a long time. This call is stateless.
16 */ 16 */
17#ifdef ARCH_HAS_NMI_WATCHDOG 17#if defined(ARCH_HAS_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
18#include <asm/nmi.h> 18#include <asm/nmi.h>
19extern void touch_nmi_watchdog(void); 19extern void touch_nmi_watchdog(void);
20extern void acpi_nmi_disable(void);
21extern void acpi_nmi_enable(void);
22#else 20#else
23#ifndef CONFIG_HARDLOCKUP_DETECTOR
24static inline void touch_nmi_watchdog(void) 21static inline void touch_nmi_watchdog(void)
25{ 22{
26 touch_softlockup_watchdog(); 23 touch_softlockup_watchdog();
27} 24}
28#else
29extern void touch_nmi_watchdog(void);
30#endif
31static inline void acpi_nmi_disable(void) { }
32static inline void acpi_nmi_enable(void) { }
33#endif 25#endif
34 26
35/* 27/*
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4f1279e105ee..dda5b0a3ff60 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -215,8 +215,9 @@ struct perf_event_attr {
215 */ 215 */
216 precise_ip : 2, /* skid constraint */ 216 precise_ip : 2, /* skid constraint */
217 mmap_data : 1, /* non-exec mmap data */ 217 mmap_data : 1, /* non-exec mmap data */
218 sample_id_all : 1, /* sample_type all events */
218 219
219 __reserved_1 : 46; 220 __reserved_1 : 45;
220 221
221 union { 222 union {
222 __u32 wakeup_events; /* wakeup every n events */ 223 __u32 wakeup_events; /* wakeup every n events */
@@ -327,6 +328,15 @@ struct perf_event_header {
327enum perf_event_type { 328enum perf_event_type {
328 329
329 /* 330 /*
331 * If perf_event_attr.sample_id_all is set then all event types will
332 * have the sample_type selected fields related to where/when
333 * (identity) an event took place (TID, TIME, ID, CPU, STREAM_ID)
334 * described in PERF_RECORD_SAMPLE below, it will be stashed just after
335 * the perf_event_header and the fields already present for the existing
336 * fields, i.e. at the end of the payload. That way a newer perf.data
337 * file will be supported by older perf tools, with these new optional
338 * fields being ignored.
339 *
330 * The MMAP events record the PROT_EXEC mappings so that we can 340 * The MMAP events record the PROT_EXEC mappings so that we can
331 * correlate userspace IPs to code. They have the following structure: 341 * correlate userspace IPs to code. They have the following structure:
332 * 342 *
@@ -578,6 +588,10 @@ struct perf_event;
578struct pmu { 588struct pmu {
579 struct list_head entry; 589 struct list_head entry;
580 590
591 struct device *dev;
592 char *name;
593 int type;
594
581 int * __percpu pmu_disable_count; 595 int * __percpu pmu_disable_count;
582 struct perf_cpu_context * __percpu pmu_cpu_context; 596 struct perf_cpu_context * __percpu pmu_cpu_context;
583 int task_ctx_nr; 597 int task_ctx_nr;
@@ -758,6 +772,9 @@ struct perf_event {
758 u64 shadow_ctx_time; 772 u64 shadow_ctx_time;
759 773
760 struct perf_event_attr attr; 774 struct perf_event_attr attr;
775 u16 header_size;
776 u16 id_header_size;
777 u16 read_size;
761 struct hw_perf_event hw; 778 struct hw_perf_event hw;
762 779
763 struct perf_event_context *ctx; 780 struct perf_event_context *ctx;
@@ -903,7 +920,7 @@ struct perf_output_handle {
903 920
904#ifdef CONFIG_PERF_EVENTS 921#ifdef CONFIG_PERF_EVENTS
905 922
906extern int perf_pmu_register(struct pmu *pmu); 923extern int perf_pmu_register(struct pmu *pmu, char *name, int type);
907extern void perf_pmu_unregister(struct pmu *pmu); 924extern void perf_pmu_unregister(struct pmu *pmu);
908 925
909extern int perf_num_counters(void); 926extern int perf_num_counters(void);
@@ -970,6 +987,11 @@ extern int perf_event_overflow(struct perf_event *event, int nmi,
970 struct perf_sample_data *data, 987 struct perf_sample_data *data,
971 struct pt_regs *regs); 988 struct pt_regs *regs);
972 989
990static inline bool is_sampling_event(struct perf_event *event)
991{
992 return event->attr.sample_period != 0;
993}
994
973/* 995/*
974 * Return 1 for a software event, 0 for a hardware event 996 * Return 1 for a software event, 0 for a hardware event
975 */ 997 */
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index f31ef61f1c65..2dea94fc4402 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -241,11 +241,6 @@ static inline void list_splice_init_rcu(struct list_head *list,
241#define list_first_entry_rcu(ptr, type, member) \ 241#define list_first_entry_rcu(ptr, type, member) \
242 list_entry_rcu((ptr)->next, type, member) 242 list_entry_rcu((ptr)->next, type, member)
243 243
244#define __list_for_each_rcu(pos, head) \
245 for (pos = rcu_dereference_raw(list_next_rcu(head)); \
246 pos != (head); \
247 pos = rcu_dereference_raw(list_next_rcu((pos)))
248
249/** 244/**
250 * list_for_each_entry_rcu - iterate over rcu list of given type 245 * list_for_each_entry_rcu - iterate over rcu list of given type
251 * @pos: the type * to use as a loop cursor. 246 * @pos: the type * to use as a loop cursor.
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 03cda7bed985..af5614856285 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -47,6 +47,8 @@
47extern int rcutorture_runnable; /* for sysctl */ 47extern int rcutorture_runnable; /* for sysctl */
48#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ 48#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
49 49
50#define UINT_CMP_GE(a, b) (UINT_MAX / 2 >= (a) - (b))
51#define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b))
50#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) 52#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
51#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) 53#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b))
52 54
@@ -66,7 +68,6 @@ extern void call_rcu_sched(struct rcu_head *head,
66extern void synchronize_sched(void); 68extern void synchronize_sched(void);
67extern void rcu_barrier_bh(void); 69extern void rcu_barrier_bh(void);
68extern void rcu_barrier_sched(void); 70extern void rcu_barrier_sched(void);
69extern void synchronize_sched_expedited(void);
70extern int sched_expedited_torture_stats(char *page); 71extern int sched_expedited_torture_stats(char *page);
71 72
72static inline void __rcu_read_lock_bh(void) 73static inline void __rcu_read_lock_bh(void)
@@ -118,7 +119,6 @@ static inline int rcu_preempt_depth(void)
118#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ 119#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
119 120
120/* Internal to kernel */ 121/* Internal to kernel */
121extern void rcu_init(void);
122extern void rcu_sched_qs(int cpu); 122extern void rcu_sched_qs(int cpu);
123extern void rcu_bh_qs(int cpu); 123extern void rcu_bh_qs(int cpu);
124extern void rcu_check_callbacks(int cpu, int user); 124extern void rcu_check_callbacks(int cpu, int user);
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 13877cb93a60..30ebd7c8d874 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -27,7 +27,9 @@
27 27
28#include <linux/cache.h> 28#include <linux/cache.h>
29 29
30#define rcu_init_sched() do { } while (0) 30static inline void rcu_init(void)
31{
32}
31 33
32#ifdef CONFIG_TINY_RCU 34#ifdef CONFIG_TINY_RCU
33 35
@@ -58,6 +60,11 @@ static inline void synchronize_rcu_bh_expedited(void)
58 synchronize_sched(); 60 synchronize_sched();
59} 61}
60 62
63static inline void synchronize_sched_expedited(void)
64{
65 synchronize_sched();
66}
67
61#ifdef CONFIG_TINY_RCU 68#ifdef CONFIG_TINY_RCU
62 69
63static inline void rcu_preempt_note_context_switch(void) 70static inline void rcu_preempt_note_context_switch(void)
@@ -125,16 +132,12 @@ static inline void rcu_cpu_stall_reset(void)
125} 132}
126 133
127#ifdef CONFIG_DEBUG_LOCK_ALLOC 134#ifdef CONFIG_DEBUG_LOCK_ALLOC
128
129extern int rcu_scheduler_active __read_mostly; 135extern int rcu_scheduler_active __read_mostly;
130extern void rcu_scheduler_starting(void); 136extern void rcu_scheduler_starting(void);
131
132#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 137#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
133
134static inline void rcu_scheduler_starting(void) 138static inline void rcu_scheduler_starting(void)
135{ 139{
136} 140}
137
138#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 141#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
139 142
140#endif /* __LINUX_RCUTINY_H */ 143#endif /* __LINUX_RCUTINY_H */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 95518e628794..3a933482734a 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -30,6 +30,7 @@
30#ifndef __LINUX_RCUTREE_H 30#ifndef __LINUX_RCUTREE_H
31#define __LINUX_RCUTREE_H 31#define __LINUX_RCUTREE_H
32 32
33extern void rcu_init(void);
33extern void rcu_note_context_switch(int cpu); 34extern void rcu_note_context_switch(int cpu);
34extern int rcu_needs_cpu(int cpu); 35extern int rcu_needs_cpu(int cpu);
35extern void rcu_cpu_stall_reset(void); 36extern void rcu_cpu_stall_reset(void);
@@ -47,6 +48,7 @@ static inline void exit_rcu(void)
47#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ 48#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
48 49
49extern void synchronize_rcu_bh(void); 50extern void synchronize_rcu_bh(void);
51extern void synchronize_sched_expedited(void);
50extern void synchronize_rcu_expedited(void); 52extern void synchronize_rcu_expedited(void);
51 53
52static inline void synchronize_rcu_bh_expedited(void) 54static inline void synchronize_rcu_bh_expedited(void)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 223874538b33..777cd01e240e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -316,6 +316,7 @@ extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
316 size_t *lenp, loff_t *ppos); 316 size_t *lenp, loff_t *ppos);
317extern unsigned int softlockup_panic; 317extern unsigned int softlockup_panic;
318extern int softlockup_thresh; 318extern int softlockup_thresh;
319void lockup_detector_init(void);
319#else 320#else
320static inline void touch_softlockup_watchdog(void) 321static inline void touch_softlockup_watchdog(void)
321{ 322{
@@ -326,6 +327,9 @@ static inline void touch_softlockup_watchdog_sync(void)
326static inline void touch_all_softlockup_watchdogs(void) 327static inline void touch_all_softlockup_watchdogs(void)
327{ 328{
328} 329}
330static inline void lockup_detector_init(void)
331{
332}
329#endif 333#endif
330 334
331#ifdef CONFIG_DETECT_HUNG_TASK 335#ifdef CONFIG_DETECT_HUNG_TASK
@@ -509,6 +513,8 @@ struct thread_group_cputimer {
509 spinlock_t lock; 513 spinlock_t lock;
510}; 514};
511 515
516struct autogroup;
517
512/* 518/*
513 * NOTE! "signal_struct" does not have it's own 519 * NOTE! "signal_struct" does not have it's own
514 * locking, because a shared signal_struct always 520 * locking, because a shared signal_struct always
@@ -576,6 +582,9 @@ struct signal_struct {
576 582
577 struct tty_struct *tty; /* NULL if no tty */ 583 struct tty_struct *tty; /* NULL if no tty */
578 584
585#ifdef CONFIG_SCHED_AUTOGROUP
586 struct autogroup *autogroup;
587#endif
579 /* 588 /*
580 * Cumulative resource counters for dead threads in the group, 589 * Cumulative resource counters for dead threads in the group,
581 * and for reaped dead child processes forked by this group. 590 * and for reaped dead child processes forked by this group.
@@ -1229,13 +1238,18 @@ struct task_struct {
1229#ifdef CONFIG_TREE_PREEMPT_RCU 1238#ifdef CONFIG_TREE_PREEMPT_RCU
1230 struct rcu_node *rcu_blocked_node; 1239 struct rcu_node *rcu_blocked_node;
1231#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 1240#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
1241#ifdef CONFIG_RCU_BOOST
1242 struct rt_mutex *rcu_boost_mutex;
1243#endif /* #ifdef CONFIG_RCU_BOOST */
1232 1244
1233#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 1245#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
1234 struct sched_info sched_info; 1246 struct sched_info sched_info;
1235#endif 1247#endif
1236 1248
1237 struct list_head tasks; 1249 struct list_head tasks;
1250#ifdef CONFIG_SMP
1238 struct plist_node pushable_tasks; 1251 struct plist_node pushable_tasks;
1252#endif
1239 1253
1240 struct mm_struct *mm, *active_mm; 1254 struct mm_struct *mm, *active_mm;
1241#if defined(SPLIT_RSS_COUNTING) 1255#if defined(SPLIT_RSS_COUNTING)
@@ -1759,7 +1773,8 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
1759#ifdef CONFIG_PREEMPT_RCU 1773#ifdef CONFIG_PREEMPT_RCU
1760 1774
1761#define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ 1775#define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
1762#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ 1776#define RCU_READ_UNLOCK_BOOSTED (1 << 1) /* boosted while in RCU read-side. */
1777#define RCU_READ_UNLOCK_NEED_QS (1 << 2) /* RCU core needs CPU response. */
1763 1778
1764static inline void rcu_copy_process(struct task_struct *p) 1779static inline void rcu_copy_process(struct task_struct *p)
1765{ 1780{
@@ -1767,7 +1782,10 @@ static inline void rcu_copy_process(struct task_struct *p)
1767 p->rcu_read_unlock_special = 0; 1782 p->rcu_read_unlock_special = 0;
1768#ifdef CONFIG_TREE_PREEMPT_RCU 1783#ifdef CONFIG_TREE_PREEMPT_RCU
1769 p->rcu_blocked_node = NULL; 1784 p->rcu_blocked_node = NULL;
1770#endif 1785#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
1786#ifdef CONFIG_RCU_BOOST
1787 p->rcu_boost_mutex = NULL;
1788#endif /* #ifdef CONFIG_RCU_BOOST */
1771 INIT_LIST_HEAD(&p->rcu_node_entry); 1789 INIT_LIST_HEAD(&p->rcu_node_entry);
1772} 1790}
1773 1791
@@ -1872,14 +1890,11 @@ extern void sched_clock_idle_sleep_event(void);
1872extern void sched_clock_idle_wakeup_event(u64 delta_ns); 1890extern void sched_clock_idle_wakeup_event(u64 delta_ns);
1873 1891
1874#ifdef CONFIG_HOTPLUG_CPU 1892#ifdef CONFIG_HOTPLUG_CPU
1875extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p);
1876extern void idle_task_exit(void); 1893extern void idle_task_exit(void);
1877#else 1894#else
1878static inline void idle_task_exit(void) {} 1895static inline void idle_task_exit(void) {}
1879#endif 1896#endif
1880 1897
1881extern void sched_idle_next(void);
1882
1883#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) 1898#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
1884extern void wake_up_idle_cpu(int cpu); 1899extern void wake_up_idle_cpu(int cpu);
1885#else 1900#else
@@ -1889,8 +1904,6 @@ static inline void wake_up_idle_cpu(int cpu) { }
1889extern unsigned int sysctl_sched_latency; 1904extern unsigned int sysctl_sched_latency;
1890extern unsigned int sysctl_sched_min_granularity; 1905extern unsigned int sysctl_sched_min_granularity;
1891extern unsigned int sysctl_sched_wakeup_granularity; 1906extern unsigned int sysctl_sched_wakeup_granularity;
1892extern unsigned int sysctl_sched_shares_ratelimit;
1893extern unsigned int sysctl_sched_shares_thresh;
1894extern unsigned int sysctl_sched_child_runs_first; 1907extern unsigned int sysctl_sched_child_runs_first;
1895 1908
1896enum sched_tunable_scaling { 1909enum sched_tunable_scaling {
@@ -1906,6 +1919,7 @@ extern unsigned int sysctl_sched_migration_cost;
1906extern unsigned int sysctl_sched_nr_migrate; 1919extern unsigned int sysctl_sched_nr_migrate;
1907extern unsigned int sysctl_sched_time_avg; 1920extern unsigned int sysctl_sched_time_avg;
1908extern unsigned int sysctl_timer_migration; 1921extern unsigned int sysctl_timer_migration;
1922extern unsigned int sysctl_sched_shares_window;
1909 1923
1910int sched_proc_update_handler(struct ctl_table *table, int write, 1924int sched_proc_update_handler(struct ctl_table *table, int write,
1911 void __user *buffer, size_t *length, 1925 void __user *buffer, size_t *length,
@@ -1931,6 +1945,24 @@ int sched_rt_handler(struct ctl_table *table, int write,
1931 1945
1932extern unsigned int sysctl_sched_compat_yield; 1946extern unsigned int sysctl_sched_compat_yield;
1933 1947
1948#ifdef CONFIG_SCHED_AUTOGROUP
1949extern unsigned int sysctl_sched_autogroup_enabled;
1950
1951extern void sched_autogroup_create_attach(struct task_struct *p);
1952extern void sched_autogroup_detach(struct task_struct *p);
1953extern void sched_autogroup_fork(struct signal_struct *sig);
1954extern void sched_autogroup_exit(struct signal_struct *sig);
1955#ifdef CONFIG_PROC_FS
1956extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m);
1957extern int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice);
1958#endif
1959#else
1960static inline void sched_autogroup_create_attach(struct task_struct *p) { }
1961static inline void sched_autogroup_detach(struct task_struct *p) { }
1962static inline void sched_autogroup_fork(struct signal_struct *sig) { }
1963static inline void sched_autogroup_exit(struct signal_struct *sig) { }
1964#endif
1965
1934#ifdef CONFIG_RT_MUTEXES 1966#ifdef CONFIG_RT_MUTEXES
1935extern int rt_mutex_getprio(struct task_struct *p); 1967extern int rt_mutex_getprio(struct task_struct *p);
1936extern void rt_mutex_setprio(struct task_struct *p, int prio); 1968extern void rt_mutex_setprio(struct task_struct *p, int prio);
@@ -1949,9 +1981,10 @@ extern int task_nice(const struct task_struct *p);
1949extern int can_nice(const struct task_struct *p, const int nice); 1981extern int can_nice(const struct task_struct *p, const int nice);
1950extern int task_curr(const struct task_struct *p); 1982extern int task_curr(const struct task_struct *p);
1951extern int idle_cpu(int cpu); 1983extern int idle_cpu(int cpu);
1952extern int sched_setscheduler(struct task_struct *, int, struct sched_param *); 1984extern int sched_setscheduler(struct task_struct *, int,
1985 const struct sched_param *);
1953extern int sched_setscheduler_nocheck(struct task_struct *, int, 1986extern int sched_setscheduler_nocheck(struct task_struct *, int,
1954 struct sched_param *); 1987 const struct sched_param *);
1955extern struct task_struct *idle_task(int cpu); 1988extern struct task_struct *idle_task(int cpu);
1956extern struct task_struct *curr_task(int cpu); 1989extern struct task_struct *curr_task(int cpu);
1957extern void set_curr_task(int cpu, struct task_struct *p); 1990extern void set_curr_task(int cpu, struct task_struct *p);
diff --git a/include/linux/sfi.h b/include/linux/sfi.h
index 7f770c638e99..fe817918b30e 100644
--- a/include/linux/sfi.h
+++ b/include/linux/sfi.h
@@ -77,6 +77,8 @@
77#define SFI_OEM_ID_SIZE 6 77#define SFI_OEM_ID_SIZE 6
78#define SFI_OEM_TABLE_ID_SIZE 8 78#define SFI_OEM_TABLE_ID_SIZE 8
79 79
80#define SFI_NAME_LEN 16
81
80#define SFI_SYST_SEARCH_BEGIN 0x000E0000 82#define SFI_SYST_SEARCH_BEGIN 0x000E0000
81#define SFI_SYST_SEARCH_END 0x000FFFFF 83#define SFI_SYST_SEARCH_END 0x000FFFFF
82 84
@@ -156,13 +158,13 @@ struct sfi_device_table_entry {
156 u16 addr; 158 u16 addr;
157 u8 irq; 159 u8 irq;
158 u32 max_freq; 160 u32 max_freq;
159 char name[16]; 161 char name[SFI_NAME_LEN];
160} __packed; 162} __packed;
161 163
162struct sfi_gpio_table_entry { 164struct sfi_gpio_table_entry {
163 char controller_name[16]; 165 char controller_name[SFI_NAME_LEN];
164 u16 pin_no; 166 u16 pin_no;
165 char pin_name[16]; 167 char pin_name[SFI_NAME_LEN];
166} __packed; 168} __packed;
167 169
168typedef int (*sfi_table_handler) (struct sfi_table_header *table); 170typedef int (*sfi_table_handler) (struct sfi_table_header *table);
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 51efbef38fb0..25310f1d7f37 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -2,6 +2,7 @@
2#define __LINUX_STACKTRACE_H 2#define __LINUX_STACKTRACE_H
3 3
4struct task_struct; 4struct task_struct;
5struct pt_regs;
5 6
6#ifdef CONFIG_STACKTRACE 7#ifdef CONFIG_STACKTRACE
7struct task_struct; 8struct task_struct;
@@ -13,7 +14,8 @@ struct stack_trace {
13}; 14};
14 15
15extern void save_stack_trace(struct stack_trace *trace); 16extern void save_stack_trace(struct stack_trace *trace);
16extern void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp); 17extern void save_stack_trace_regs(struct stack_trace *trace,
18 struct pt_regs *regs);
17extern void save_stack_trace_tsk(struct task_struct *tsk, 19extern void save_stack_trace_tsk(struct task_struct *tsk,
18 struct stack_trace *trace); 20 struct stack_trace *trace);
19 21
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index cacc27a0e285..18cd0684fc4e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -127,8 +127,6 @@ extern struct trace_event_functions exit_syscall_print_funcs;
127#define SYSCALL_TRACE_ENTER_EVENT(sname) \ 127#define SYSCALL_TRACE_ENTER_EVENT(sname) \
128 static struct syscall_metadata \ 128 static struct syscall_metadata \
129 __attribute__((__aligned__(4))) __syscall_meta_##sname; \ 129 __attribute__((__aligned__(4))) __syscall_meta_##sname; \
130 static struct ftrace_event_call \
131 __attribute__((__aligned__(4))) event_enter_##sname; \
132 static struct ftrace_event_call __used \ 130 static struct ftrace_event_call __used \
133 __attribute__((__aligned__(4))) \ 131 __attribute__((__aligned__(4))) \
134 __attribute__((section("_ftrace_events"))) \ 132 __attribute__((section("_ftrace_events"))) \
@@ -137,13 +135,12 @@ extern struct trace_event_functions exit_syscall_print_funcs;
137 .class = &event_class_syscall_enter, \ 135 .class = &event_class_syscall_enter, \
138 .event.funcs = &enter_syscall_print_funcs, \ 136 .event.funcs = &enter_syscall_print_funcs, \
139 .data = (void *)&__syscall_meta_##sname,\ 137 .data = (void *)&__syscall_meta_##sname,\
140 } 138 }; \
139 __TRACE_EVENT_FLAGS(enter_##sname, TRACE_EVENT_FL_CAP_ANY)
141 140
142#define SYSCALL_TRACE_EXIT_EVENT(sname) \ 141#define SYSCALL_TRACE_EXIT_EVENT(sname) \
143 static struct syscall_metadata \ 142 static struct syscall_metadata \
144 __attribute__((__aligned__(4))) __syscall_meta_##sname; \ 143 __attribute__((__aligned__(4))) __syscall_meta_##sname; \
145 static struct ftrace_event_call \
146 __attribute__((__aligned__(4))) event_exit_##sname; \
147 static struct ftrace_event_call __used \ 144 static struct ftrace_event_call __used \
148 __attribute__((__aligned__(4))) \ 145 __attribute__((__aligned__(4))) \
149 __attribute__((section("_ftrace_events"))) \ 146 __attribute__((section("_ftrace_events"))) \
@@ -152,7 +149,8 @@ extern struct trace_event_functions exit_syscall_print_funcs;
152 .class = &event_class_syscall_exit, \ 149 .class = &event_class_syscall_exit, \
153 .event.funcs = &exit_syscall_print_funcs, \ 150 .event.funcs = &exit_syscall_print_funcs, \
154 .data = (void *)&__syscall_meta_##sname,\ 151 .data = (void *)&__syscall_meta_##sname,\
155 } 152 }; \
153 __TRACE_EVENT_FLAGS(exit_##sname, TRACE_EVENT_FL_CAP_ANY)
156 154
157#define SYSCALL_METADATA(sname, nb) \ 155#define SYSCALL_METADATA(sname, nb) \
158 SYSCALL_TRACE_ENTER_EVENT(sname); \ 156 SYSCALL_TRACE_ENTER_EVENT(sname); \
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
index 341dddb55090..2466e550a41d 100644
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -33,7 +33,7 @@
33 */ 33 */
34 34
35 35
36#define TASKSTATS_VERSION 7 36#define TASKSTATS_VERSION 8
37#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN 37#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
38 * in linux/sched.h */ 38 * in linux/sched.h */
39 39
@@ -188,6 +188,7 @@ enum {
188 TASKSTATS_TYPE_STATS, /* taskstats structure */ 188 TASKSTATS_TYPE_STATS, /* taskstats structure */
189 TASKSTATS_TYPE_AGGR_PID, /* contains pid + stats */ 189 TASKSTATS_TYPE_AGGR_PID, /* contains pid + stats */
190 TASKSTATS_TYPE_AGGR_TGID, /* contains tgid + stats */ 190 TASKSTATS_TYPE_AGGR_TGID, /* contains tgid + stats */
191 TASKSTATS_TYPE_NULL, /* contains nothing */
191 __TASKSTATS_TYPE_MAX, 192 __TASKSTATS_TYPE_MAX,
192}; 193};
193 194
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 38cf093ef62c..6abd9138beda 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -24,9 +24,9 @@ struct timer_list {
24 int slack; 24 int slack;
25 25
26#ifdef CONFIG_TIMER_STATS 26#ifdef CONFIG_TIMER_STATS
27 int start_pid;
27 void *start_site; 28 void *start_site;
28 char start_comm[16]; 29 char start_comm[16];
29 int start_pid;
30#endif 30#endif
31#ifdef CONFIG_LOCKDEP 31#ifdef CONFIG_LOCKDEP
32 struct lockdep_map lockdep_map; 32 struct lockdep_map lockdep_map;
@@ -48,12 +48,38 @@ extern struct tvec_base boot_tvec_bases;
48#define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn) 48#define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn)
49#endif 49#endif
50 50
51/*
52 * Note that all tvec_bases are 2 byte aligned and lower bit of
53 * base in timer_list is guaranteed to be zero. Use the LSB to
54 * indicate whether the timer is deferrable.
55 *
56 * A deferrable timer will work normally when the system is busy, but
57 * will not cause a CPU to come out of idle just to service it; instead,
58 * the timer will be serviced when the CPU eventually wakes up with a
59 * subsequent non-deferrable timer.
60 */
61#define TBASE_DEFERRABLE_FLAG (0x1)
62
51#define TIMER_INITIALIZER(_function, _expires, _data) { \ 63#define TIMER_INITIALIZER(_function, _expires, _data) { \
52 .entry = { .prev = TIMER_ENTRY_STATIC }, \ 64 .entry = { .prev = TIMER_ENTRY_STATIC }, \
53 .function = (_function), \ 65 .function = (_function), \
54 .expires = (_expires), \ 66 .expires = (_expires), \
55 .data = (_data), \ 67 .data = (_data), \
56 .base = &boot_tvec_bases, \ 68 .base = &boot_tvec_bases, \
69 .slack = -1, \
70 __TIMER_LOCKDEP_MAP_INITIALIZER( \
71 __FILE__ ":" __stringify(__LINE__)) \
72 }
73
74#define TBASE_MAKE_DEFERRED(ptr) ((struct tvec_base *) \
75 ((unsigned char *)(ptr) + TBASE_DEFERRABLE_FLAG))
76
77#define TIMER_DEFERRED_INITIALIZER(_function, _expires, _data) {\
78 .entry = { .prev = TIMER_ENTRY_STATIC }, \
79 .function = (_function), \
80 .expires = (_expires), \
81 .data = (_data), \
82 .base = TBASE_MAKE_DEFERRED(&boot_tvec_bases), \
57 __TIMER_LOCKDEP_MAP_INITIALIZER( \ 83 __TIMER_LOCKDEP_MAP_INITIALIZER( \
58 __FILE__ ":" __stringify(__LINE__)) \ 84 __FILE__ ":" __stringify(__LINE__)) \
59 } 85 }
@@ -248,11 +274,11 @@ static inline void timer_stats_timer_clear_start_info(struct timer_list *timer)
248 274
249extern void add_timer(struct timer_list *timer); 275extern void add_timer(struct timer_list *timer);
250 276
277extern int try_to_del_timer_sync(struct timer_list *timer);
278
251#ifdef CONFIG_SMP 279#ifdef CONFIG_SMP
252 extern int try_to_del_timer_sync(struct timer_list *timer);
253 extern int del_timer_sync(struct timer_list *timer); 280 extern int del_timer_sync(struct timer_list *timer);
254#else 281#else
255# define try_to_del_timer_sync(t) del_timer(t)
256# define del_timer_sync(t) del_timer(t) 282# define del_timer_sync(t) del_timer(t)
257#endif 283#endif
258 284
diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h
new file mode 100644
index 000000000000..d24aabaca474
--- /dev/null
+++ b/include/linux/timerqueue.h
@@ -0,0 +1,50 @@
1#ifndef _LINUX_TIMERQUEUE_H
2#define _LINUX_TIMERQUEUE_H
3
4#include <linux/rbtree.h>
5#include <linux/ktime.h>
6
7
8struct timerqueue_node {
9 struct rb_node node;
10 ktime_t expires;
11};
12
13struct timerqueue_head {
14 struct rb_root head;
15 struct timerqueue_node *next;
16};
17
18
19extern void timerqueue_add(struct timerqueue_head *head,
20 struct timerqueue_node *node);
21extern void timerqueue_del(struct timerqueue_head *head,
22 struct timerqueue_node *node);
23extern struct timerqueue_node *timerqueue_iterate_next(
24 struct timerqueue_node *node);
25
26/**
27 * timerqueue_getnext - Returns the timer with the earlies expiration time
28 *
29 * @head: head of timerqueue
30 *
31 * Returns a pointer to the timer node that has the
32 * earliest expiration time.
33 */
34static inline
35struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head)
36{
37 return head->next;
38}
39
40static inline void timerqueue_init(struct timerqueue_node *node)
41{
42 RB_CLEAR_NODE(&node->node);
43}
44
45static inline void timerqueue_init_head(struct timerqueue_head *head)
46{
47 head->head = RB_ROOT;
48 head->next = NULL;
49}
50#endif /* _LINUX_TIMERQUEUE_H */
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index a4a90b6726ce..d3e4f87e95c0 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -106,6 +106,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
106 106
107#define TP_PROTO(args...) args 107#define TP_PROTO(args...) args
108#define TP_ARGS(args...) args 108#define TP_ARGS(args...) args
109#define TP_CONDITION(args...) args
109 110
110#ifdef CONFIG_TRACEPOINTS 111#ifdef CONFIG_TRACEPOINTS
111 112
@@ -119,12 +120,14 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
119 * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just 120 * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just
120 * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto". 121 * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto".
121 */ 122 */
122#define __DO_TRACE(tp, proto, args) \ 123#define __DO_TRACE(tp, proto, args, cond) \
123 do { \ 124 do { \
124 struct tracepoint_func *it_func_ptr; \ 125 struct tracepoint_func *it_func_ptr; \
125 void *it_func; \ 126 void *it_func; \
126 void *__data; \ 127 void *__data; \
127 \ 128 \
129 if (!(cond)) \
130 return; \
128 rcu_read_lock_sched_notrace(); \ 131 rcu_read_lock_sched_notrace(); \
129 it_func_ptr = rcu_dereference_sched((tp)->funcs); \ 132 it_func_ptr = rcu_dereference_sched((tp)->funcs); \
130 if (it_func_ptr) { \ 133 if (it_func_ptr) { \
@@ -142,7 +145,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
142 * not add unwanted padding between the beginning of the section and the 145 * not add unwanted padding between the beginning of the section and the
143 * structure. Force alignment to the same alignment as the section start. 146 * structure. Force alignment to the same alignment as the section start.
144 */ 147 */
145#define __DECLARE_TRACE(name, proto, args, data_proto, data_args) \ 148#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
146 extern struct tracepoint __tracepoint_##name; \ 149 extern struct tracepoint __tracepoint_##name; \
147 static inline void trace_##name(proto) \ 150 static inline void trace_##name(proto) \
148 { \ 151 { \
@@ -151,7 +154,8 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
151do_trace: \ 154do_trace: \
152 __DO_TRACE(&__tracepoint_##name, \ 155 __DO_TRACE(&__tracepoint_##name, \
153 TP_PROTO(data_proto), \ 156 TP_PROTO(data_proto), \
154 TP_ARGS(data_args)); \ 157 TP_ARGS(data_args), \
158 TP_CONDITION(cond)); \
155 } \ 159 } \
156 static inline int \ 160 static inline int \
157 register_trace_##name(void (*probe)(data_proto), void *data) \ 161 register_trace_##name(void (*probe)(data_proto), void *data) \
@@ -186,7 +190,7 @@ do_trace: \
186 EXPORT_SYMBOL(__tracepoint_##name) 190 EXPORT_SYMBOL(__tracepoint_##name)
187 191
188#else /* !CONFIG_TRACEPOINTS */ 192#else /* !CONFIG_TRACEPOINTS */
189#define __DECLARE_TRACE(name, proto, args, data_proto, data_args) \ 193#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
190 static inline void trace_##name(proto) \ 194 static inline void trace_##name(proto) \
191 { } \ 195 { } \
192 static inline int \ 196 static inline int \
@@ -227,13 +231,20 @@ do_trace: \
227 * "void *__data, proto" as the callback prototype. 231 * "void *__data, proto" as the callback prototype.
228 */ 232 */
229#define DECLARE_TRACE_NOARGS(name) \ 233#define DECLARE_TRACE_NOARGS(name) \
230 __DECLARE_TRACE(name, void, , void *__data, __data) 234 __DECLARE_TRACE(name, void, , 1, void *__data, __data)
231 235
232#define DECLARE_TRACE(name, proto, args) \ 236#define DECLARE_TRACE(name, proto, args) \
233 __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ 237 __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), 1, \
234 PARAMS(void *__data, proto), \ 238 PARAMS(void *__data, proto), \
235 PARAMS(__data, args)) 239 PARAMS(__data, args))
236 240
241#define DECLARE_TRACE_CONDITION(name, proto, args, cond) \
242 __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), PARAMS(cond), \
243 PARAMS(void *__data, proto), \
244 PARAMS(__data, args))
245
246#define TRACE_EVENT_FLAGS(event, flag)
247
237#endif /* DECLARE_TRACE */ 248#endif /* DECLARE_TRACE */
238 249
239#ifndef TRACE_EVENT 250#ifndef TRACE_EVENT
@@ -347,11 +358,21 @@ do_trace: \
347 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) 358 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
348#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ 359#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
349 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) 360 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
361#define DEFINE_EVENT_CONDITION(template, name, proto, \
362 args, cond) \
363 DECLARE_TRACE_CONDITION(name, PARAMS(proto), \
364 PARAMS(args), PARAMS(cond))
350 365
351#define TRACE_EVENT(name, proto, args, struct, assign, print) \ 366#define TRACE_EVENT(name, proto, args, struct, assign, print) \
352 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) 367 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
353#define TRACE_EVENT_FN(name, proto, args, struct, \ 368#define TRACE_EVENT_FN(name, proto, args, struct, \
354 assign, print, reg, unreg) \ 369 assign, print, reg, unreg) \
355 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) 370 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
371#define TRACE_EVENT_CONDITION(name, proto, args, cond, \
372 struct, assign, print) \
373 DECLARE_TRACE_CONDITION(name, PARAMS(proto), \
374 PARAMS(args), PARAMS(cond))
375
376#define TRACE_EVENT_FLAGS(event, flag)
356 377
357#endif /* ifdef TRACE_EVENT (see note above) */ 378#endif /* ifdef TRACE_EVENT (see note above) */
diff --git a/include/linux/unaligned/packed_struct.h b/include/linux/unaligned/packed_struct.h
index 2498bb9fe002..c9a6abd972a1 100644
--- a/include/linux/unaligned/packed_struct.h
+++ b/include/linux/unaligned/packed_struct.h
@@ -3,9 +3,9 @@
3 3
4#include <linux/kernel.h> 4#include <linux/kernel.h>
5 5
6struct __una_u16 { u16 x __attribute__((packed)); }; 6struct __una_u16 { u16 x; } __attribute__((packed));
7struct __una_u32 { u32 x __attribute__((packed)); }; 7struct __una_u32 { u32 x; } __attribute__((packed));
8struct __una_u64 { u64 x __attribute__((packed)); }; 8struct __una_u64 { u64 x; } __attribute__((packed));
9 9
10static inline u16 __get_unaligned_cpu16(const void *p) 10static inline u16 __get_unaligned_cpu16(const void *p)
11{ 11{
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 0c0771f06bfa..bd257fee6031 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -127,12 +127,20 @@ struct execute_work {
127 .timer = TIMER_INITIALIZER(NULL, 0, 0), \ 127 .timer = TIMER_INITIALIZER(NULL, 0, 0), \
128 } 128 }
129 129
130#define __DEFERRED_WORK_INITIALIZER(n, f) { \
131 .work = __WORK_INITIALIZER((n).work, (f)), \
132 .timer = TIMER_DEFERRED_INITIALIZER(NULL, 0, 0), \
133 }
134
130#define DECLARE_WORK(n, f) \ 135#define DECLARE_WORK(n, f) \
131 struct work_struct n = __WORK_INITIALIZER(n, f) 136 struct work_struct n = __WORK_INITIALIZER(n, f)
132 137
133#define DECLARE_DELAYED_WORK(n, f) \ 138#define DECLARE_DELAYED_WORK(n, f) \
134 struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f) 139 struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f)
135 140
141#define DECLARE_DEFERRED_WORK(n, f) \
142 struct delayed_work n = __DEFERRED_WORK_INITIALIZER(n, f)
143
136/* 144/*
137 * initialize a work item's function pointer 145 * initialize a work item's function pointer
138 */ 146 */
diff --git a/include/media/wm8775.h b/include/media/wm8775.h
index a1c4d417dfa2..60739c5a23ae 100644
--- a/include/media/wm8775.h
+++ b/include/media/wm8775.h
@@ -32,7 +32,4 @@
32#define WM8775_AIN3 4 32#define WM8775_AIN3 4
33#define WM8775_AIN4 8 33#define WM8775_AIN4 8
34 34
35/* subdev group ID */
36#define WM8775_GID (1 << 0)
37
38#endif 35#endif
diff --git a/include/net/flow.h b/include/net/flow.h
index 0ac3fb5e0973..bb08692a20b0 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -49,7 +49,6 @@ struct flowi {
49 __u8 proto; 49 __u8 proto;
50 __u8 flags; 50 __u8 flags;
51#define FLOWI_FLAG_ANYSRC 0x01 51#define FLOWI_FLAG_ANYSRC 0x01
52#define FLOWI_FLAG_MATCH_ANY_IIF 0x02
53 union { 52 union {
54 struct { 53 struct {
55 __be16 sport; 54 __be16 sport;
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 278312c95f96..2ab926860cd8 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -164,5 +164,15 @@ static inline int ipv6_unicast_destination(struct sk_buff *skb)
164 return rt->rt6i_flags & RTF_LOCAL; 164 return rt->rt6i_flags & RTF_LOCAL;
165} 165}
166 166
167int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
168
169static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
170{
171 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
172
173 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
174 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
175}
176
167#endif 177#endif
168#endif 178#endif
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 9fdf982d1286..365359b24177 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2024,8 +2024,8 @@ static inline void ieee80211_rx_ni(struct ieee80211_hw *hw,
2024 * 2024 *
2025 * This function may not be called in IRQ context. Calls to this function 2025 * This function may not be called in IRQ context. Calls to this function
2026 * for a single hardware must be synchronized against each other. Calls 2026 * for a single hardware must be synchronized against each other. Calls
2027 * to this function and ieee80211_tx_status_irqsafe() may not be mixed 2027 * to this function, ieee80211_tx_status_ni() and ieee80211_tx_status_irqsafe()
2028 * for a single hardware. 2028 * may not be mixed for a single hardware.
2029 * 2029 *
2030 * @hw: the hardware the frame was transmitted by 2030 * @hw: the hardware the frame was transmitted by
2031 * @skb: the frame that was transmitted, owned by mac80211 after this call 2031 * @skb: the frame that was transmitted, owned by mac80211 after this call
@@ -2034,13 +2034,33 @@ void ieee80211_tx_status(struct ieee80211_hw *hw,
2034 struct sk_buff *skb); 2034 struct sk_buff *skb);
2035 2035
2036/** 2036/**
2037 * ieee80211_tx_status_ni - transmit status callback (in process context)
2038 *
2039 * Like ieee80211_tx_status() but can be called in process context.
2040 *
2041 * Calls to this function, ieee80211_tx_status() and
2042 * ieee80211_tx_status_irqsafe() may not be mixed
2043 * for a single hardware.
2044 *
2045 * @hw: the hardware the frame was transmitted by
2046 * @skb: the frame that was transmitted, owned by mac80211 after this call
2047 */
2048static inline void ieee80211_tx_status_ni(struct ieee80211_hw *hw,
2049 struct sk_buff *skb)
2050{
2051 local_bh_disable();
2052 ieee80211_tx_status(hw, skb);
2053 local_bh_enable();
2054}
2055
2056/**
2037 * ieee80211_tx_status_irqsafe - IRQ-safe transmit status callback 2057 * ieee80211_tx_status_irqsafe - IRQ-safe transmit status callback
2038 * 2058 *
2039 * Like ieee80211_tx_status() but can be called in IRQ context 2059 * Like ieee80211_tx_status() but can be called in IRQ context
2040 * (internally defers to a tasklet.) 2060 * (internally defers to a tasklet.)
2041 * 2061 *
2042 * Calls to this function and ieee80211_tx_status() may not be mixed for a 2062 * Calls to this function, ieee80211_tx_status() and
2043 * single hardware. 2063 * ieee80211_tx_status_ni() may not be mixed for a single hardware.
2044 * 2064 *
2045 * @hw: the hardware the frame was transmitted by 2065 * @hw: the hardware the frame was transmitted by
2046 * @skb: the frame that was transmitted, owned by mac80211 after this call 2066 * @skb: the frame that was transmitted, owned by mac80211 after this call
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index dd3031aed9d5..9fcc680ab6b9 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -323,7 +323,9 @@ static inline unsigned char * tcf_get_base_ptr(struct sk_buff *skb, int layer)
323static inline int tcf_valid_offset(const struct sk_buff *skb, 323static inline int tcf_valid_offset(const struct sk_buff *skb,
324 const unsigned char *ptr, const int len) 324 const unsigned char *ptr, const int len)
325{ 325{
326 return unlikely((ptr + len) < skb_tail_pointer(skb) && ptr > skb->head); 326 return likely((ptr + len) <= skb_tail_pointer(skb) &&
327 ptr >= skb->head &&
328 (ptr <= (ptr + len)));
327} 329}
328 330
329#ifdef CONFIG_NET_CLS_IND 331#ifdef CONFIG_NET_CLS_IND
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index ea1f8a83160d..79f34e2b752f 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -610,11 +610,7 @@ static inline struct sk_buff *skb_act_clone(struct sk_buff *skb, gfp_t gfp_mask,
610{ 610{
611 struct sk_buff *n; 611 struct sk_buff *n;
612 612
613 if ((action == TC_ACT_STOLEN || action == TC_ACT_QUEUED) && 613 n = skb_clone(skb, gfp_mask);
614 !skb_shared(skb))
615 n = skb_get(skb);
616 else
617 n = skb_clone(skb, gfp_mask);
618 614
619 if (n) { 615 if (n) {
620 n->tc_verd = SET_TC_VERD(n->tc_verd, 0); 616 n->tc_verd = SET_TC_VERD(n->tc_verd, 0);
diff --git a/include/net/sock.h b/include/net/sock.h
index 659d968d95c5..7d3f7ce239b5 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -754,6 +754,7 @@ struct proto {
754 void (*unhash)(struct sock *sk); 754 void (*unhash)(struct sock *sk);
755 void (*rehash)(struct sock *sk); 755 void (*rehash)(struct sock *sk);
756 int (*get_port)(struct sock *sk, unsigned short snum); 756 int (*get_port)(struct sock *sk, unsigned short snum);
757 void (*clear_sk)(struct sock *sk, int size);
757 758
758 /* Keeping track of sockets in use */ 759 /* Keeping track of sockets in use */
759#ifdef CONFIG_PROC_FS 760#ifdef CONFIG_PROC_FS
@@ -852,6 +853,8 @@ static inline void __sk_prot_rehash(struct sock *sk)
852 sk->sk_prot->hash(sk); 853 sk->sk_prot->hash(sk);
853} 854}
854 855
856void sk_prot_clear_portaddr_nulls(struct sock *sk, int size);
857
855/* About 10 seconds */ 858/* About 10 seconds */
856#define SOCK_DESTROY_TIME (10*HZ) 859#define SOCK_DESTROY_TIME (10*HZ)
857 860
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index 1dfab5401511..b0b4eb24d592 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -26,6 +26,15 @@
26#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ 26#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
27 DEFINE_TRACE(name) 27 DEFINE_TRACE(name)
28 28
29#undef TRACE_EVENT_CONDITION
30#define TRACE_EVENT_CONDITION(name, proto, args, cond, tstruct, assign, print) \
31 TRACE_EVENT(name, \
32 PARAMS(proto), \
33 PARAMS(args), \
34 PARAMS(tstruct), \
35 PARAMS(assign), \
36 PARAMS(print))
37
29#undef TRACE_EVENT_FN 38#undef TRACE_EVENT_FN
30#define TRACE_EVENT_FN(name, proto, args, tstruct, \ 39#define TRACE_EVENT_FN(name, proto, args, tstruct, \
31 assign, print, reg, unreg) \ 40 assign, print, reg, unreg) \
@@ -39,6 +48,10 @@
39#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ 48#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
40 DEFINE_TRACE(name) 49 DEFINE_TRACE(name)
41 50
51#undef DEFINE_EVENT_CONDITION
52#define DEFINE_EVENT_CONDITION(template, name, proto, args, cond) \
53 DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
54
42#undef DECLARE_TRACE 55#undef DECLARE_TRACE
43#define DECLARE_TRACE(name, proto, args) \ 56#define DECLARE_TRACE(name, proto, args) \
44 DEFINE_TRACE(name) 57 DEFINE_TRACE(name)
@@ -75,9 +88,11 @@
75 88
76#undef TRACE_EVENT 89#undef TRACE_EVENT
77#undef TRACE_EVENT_FN 90#undef TRACE_EVENT_FN
91#undef TRACE_EVENT_CONDITION
78#undef DECLARE_EVENT_CLASS 92#undef DECLARE_EVENT_CLASS
79#undef DEFINE_EVENT 93#undef DEFINE_EVENT
80#undef DEFINE_EVENT_PRINT 94#undef DEFINE_EVENT_PRINT
95#undef DEFINE_EVENT_CONDITION
81#undef TRACE_HEADER_MULTI_READ 96#undef TRACE_HEADER_MULTI_READ
82#undef DECLARE_TRACE 97#undef DECLARE_TRACE
83 98
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index 286784d69b8f..1bcc2a8c00e2 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -7,16 +7,67 @@
7#include <linux/ktime.h> 7#include <linux/ktime.h>
8#include <linux/tracepoint.h> 8#include <linux/tracepoint.h>
9 9
10#ifndef _TRACE_POWER_ENUM_ 10DECLARE_EVENT_CLASS(cpu,
11#define _TRACE_POWER_ENUM_ 11
12enum { 12 TP_PROTO(unsigned int state, unsigned int cpu_id),
13 POWER_NONE = 0, 13
14 POWER_CSTATE = 1, /* C-State */ 14 TP_ARGS(state, cpu_id),
15 POWER_PSTATE = 2, /* Fequency change or DVFS */ 15
16 POWER_SSTATE = 3, /* Suspend */ 16 TP_STRUCT__entry(
17}; 17 __field( u32, state )
18 __field( u32, cpu_id )
19 ),
20
21 TP_fast_assign(
22 __entry->state = state;
23 __entry->cpu_id = cpu_id;
24 ),
25
26 TP_printk("state=%lu cpu_id=%lu", (unsigned long)__entry->state,
27 (unsigned long)__entry->cpu_id)
28);
29
30DEFINE_EVENT(cpu, cpu_idle,
31
32 TP_PROTO(unsigned int state, unsigned int cpu_id),
33
34 TP_ARGS(state, cpu_id)
35);
36
37/* This file can get included multiple times, TRACE_HEADER_MULTI_READ at top */
38#ifndef _PWR_EVENT_AVOID_DOUBLE_DEFINING
39#define _PWR_EVENT_AVOID_DOUBLE_DEFINING
40
41#define PWR_EVENT_EXIT -1
18#endif 42#endif
19 43
44DEFINE_EVENT(cpu, cpu_frequency,
45
46 TP_PROTO(unsigned int frequency, unsigned int cpu_id),
47
48 TP_ARGS(frequency, cpu_id)
49);
50
51TRACE_EVENT(machine_suspend,
52
53 TP_PROTO(unsigned int state),
54
55 TP_ARGS(state),
56
57 TP_STRUCT__entry(
58 __field( u32, state )
59 ),
60
61 TP_fast_assign(
62 __entry->state = state;
63 ),
64
65 TP_printk("state=%lu", (unsigned long)__entry->state)
66);
67
68/* This code will be removed after deprecation time exceeded (2.6.41) */
69#ifdef CONFIG_EVENT_POWER_TRACING_DEPRECATED
70
20/* 71/*
21 * The power events are used for cpuidle & suspend (power_start, power_end) 72 * The power events are used for cpuidle & suspend (power_start, power_end)
22 * and for cpufreq (power_frequency) 73 * and for cpufreq (power_frequency)
@@ -75,6 +126,36 @@ TRACE_EVENT(power_end,
75 126
76); 127);
77 128
129/* Deprecated dummy functions must be protected against multi-declartion */
130#ifndef _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED
131#define _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED
132
133enum {
134 POWER_NONE = 0,
135 POWER_CSTATE = 1,
136 POWER_PSTATE = 2,
137};
138#endif /* _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED */
139
140#else /* CONFIG_EVENT_POWER_TRACING_DEPRECATED */
141
142#ifndef _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED
143#define _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED
144enum {
145 POWER_NONE = 0,
146 POWER_CSTATE = 1,
147 POWER_PSTATE = 2,
148};
149
150/* These dummy declaration have to be ripped out when the deprecated
151 events get removed */
152static inline void trace_power_start(u64 type, u64 state, u64 cpuid) {};
153static inline void trace_power_end(u64 cpuid) {};
154static inline void trace_power_frequency(u64 type, u64 state, u64 cpuid) {};
155#endif /* _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED */
156
157#endif /* CONFIG_EVENT_POWER_TRACING_DEPRECATED */
158
78/* 159/*
79 * The clock events are used for clock enable/disable and for 160 * The clock events are used for clock enable/disable and for
80 * clock rate change 161 * clock rate change
@@ -153,7 +234,6 @@ DEFINE_EVENT(power_domain, power_domain_target,
153 234
154 TP_ARGS(name, state, cpu_id) 235 TP_ARGS(name, state, cpu_id)
155); 236);
156
157#endif /* _TRACE_POWER_H */ 237#endif /* _TRACE_POWER_H */
158 238
159/* This part must be outside protection */ 239/* This part must be outside protection */
diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h
index fb726ac7caee..5a4c04a75b3d 100644
--- a/include/trace/events/syscalls.h
+++ b/include/trace/events/syscalls.h
@@ -40,6 +40,8 @@ TRACE_EVENT_FN(sys_enter,
40 syscall_regfunc, syscall_unregfunc 40 syscall_regfunc, syscall_unregfunc
41); 41);
42 42
43TRACE_EVENT_FLAGS(sys_enter, TRACE_EVENT_FL_CAP_ANY)
44
43TRACE_EVENT_FN(sys_exit, 45TRACE_EVENT_FN(sys_exit,
44 46
45 TP_PROTO(struct pt_regs *regs, long ret), 47 TP_PROTO(struct pt_regs *regs, long ret),
@@ -62,6 +64,8 @@ TRACE_EVENT_FN(sys_exit,
62 syscall_regfunc, syscall_unregfunc 64 syscall_regfunc, syscall_unregfunc
63); 65);
64 66
67TRACE_EVENT_FLAGS(sys_exit, TRACE_EVENT_FL_CAP_ANY)
68
65#endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */ 69#endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */
66 70
67#endif /* _TRACE_EVENTS_SYSCALLS_H */ 71#endif /* _TRACE_EVENTS_SYSCALLS_H */
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index a9377c0083ad..e16610c208c9 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -82,6 +82,10 @@
82 TRACE_EVENT(name, PARAMS(proto), PARAMS(args), \ 82 TRACE_EVENT(name, PARAMS(proto), PARAMS(args), \
83 PARAMS(tstruct), PARAMS(assign), PARAMS(print)) \ 83 PARAMS(tstruct), PARAMS(assign), PARAMS(print)) \
84 84
85#undef TRACE_EVENT_FLAGS
86#define TRACE_EVENT_FLAGS(name, value) \
87 __TRACE_EVENT_FLAGS(name, value)
88
85#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 89#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
86 90
87 91
@@ -129,6 +133,9 @@
129#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ 133#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
130 DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) 134 DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
131 135
136#undef TRACE_EVENT_FLAGS
137#define TRACE_EVENT_FLAGS(event, flag)
138
132#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 139#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
133 140
134/* 141/*
@@ -289,13 +296,19 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = { \
289 296
290#undef __array 297#undef __array
291#define __array(type, item, len) \ 298#define __array(type, item, len) \
292 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ 299 do { \
293 ret = trace_define_field(event_call, #type "[" #len "]", #item, \ 300 mutex_lock(&event_storage_mutex); \
301 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
302 snprintf(event_storage, sizeof(event_storage), \
303 "%s[%d]", #type, len); \
304 ret = trace_define_field(event_call, event_storage, #item, \
294 offsetof(typeof(field), item), \ 305 offsetof(typeof(field), item), \
295 sizeof(field.item), \ 306 sizeof(field.item), \
296 is_signed_type(type), FILTER_OTHER); \ 307 is_signed_type(type), FILTER_OTHER); \
297 if (ret) \ 308 mutex_unlock(&event_storage_mutex); \
298 return ret; 309 if (ret) \
310 return ret; \
311 } while (0);
299 312
300#undef __dynamic_array 313#undef __dynamic_array
301#define __dynamic_array(type, item, len) \ 314#define __dynamic_array(type, item, len) \
diff --git a/init/Kconfig b/init/Kconfig
index c9728992a776..8dfd094e6875 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -393,7 +393,6 @@ config PREEMPT_RCU
393 393
394config RCU_TRACE 394config RCU_TRACE
395 bool "Enable tracing for RCU" 395 bool "Enable tracing for RCU"
396 depends on TREE_RCU || TREE_PREEMPT_RCU
397 help 396 help
398 This option provides tracing in RCU which presents stats 397 This option provides tracing in RCU which presents stats
399 in debugfs for debugging RCU implementation. 398 in debugfs for debugging RCU implementation.
@@ -459,6 +458,60 @@ config TREE_RCU_TRACE
459 TREE_PREEMPT_RCU implementations, permitting Makefile to 458 TREE_PREEMPT_RCU implementations, permitting Makefile to
460 trivially select kernel/rcutree_trace.c. 459 trivially select kernel/rcutree_trace.c.
461 460
461config RCU_BOOST
462 bool "Enable RCU priority boosting"
463 depends on RT_MUTEXES && TINY_PREEMPT_RCU
464 default n
465 help
466 This option boosts the priority of preempted RCU readers that
467 block the current preemptible RCU grace period for too long.
468 This option also prevents heavy loads from blocking RCU
469 callback invocation for all flavors of RCU.
470
471 Say Y here if you are working with real-time apps or heavy loads
472 Say N here if you are unsure.
473
474config RCU_BOOST_PRIO
475 int "Real-time priority to boost RCU readers to"
476 range 1 99
477 depends on RCU_BOOST
478 default 1
479 help
480 This option specifies the real-time priority to which preempted
481 RCU readers are to be boosted. If you are working with CPU-bound
482 real-time applications, you should specify a priority higher then
483 the highest-priority CPU-bound application.
484
485 Specify the real-time priority, or take the default if unsure.
486
487config RCU_BOOST_DELAY
488 int "Milliseconds to delay boosting after RCU grace-period start"
489 range 0 3000
490 depends on RCU_BOOST
491 default 500
492 help
493 This option specifies the time to wait after the beginning of
494 a given grace period before priority-boosting preempted RCU
495 readers blocking that grace period. Note that any RCU reader
496 blocking an expedited RCU grace period is boosted immediately.
497
498 Accept the default if unsure.
499
500config SRCU_SYNCHRONIZE_DELAY
501 int "Microseconds to delay before waiting for readers"
502 range 0 20
503 default 10
504 help
505 This option controls how long SRCU delays before entering its
506 loop waiting on SRCU readers. The purpose of this loop is
507 to avoid the unconditional context-switch penalty that would
508 otherwise be incurred if there was an active SRCU reader,
509 in a manner similar to adaptive locking schemes. This should
510 be set to be a bit longer than the common-case SRCU read-side
511 critical-section overhead.
512
513 Accept the default if unsure.
514
462endmenu # "RCU Subsystem" 515endmenu # "RCU Subsystem"
463 516
464config IKCONFIG 517config IKCONFIG
@@ -741,6 +794,19 @@ config NET_NS
741 794
742endif # NAMESPACES 795endif # NAMESPACES
743 796
797config SCHED_AUTOGROUP
798 bool "Automatic process group scheduling"
799 select EVENTFD
800 select CGROUPS
801 select CGROUP_SCHED
802 select FAIR_GROUP_SCHED
803 help
804 This option optimizes the scheduler for common desktop workloads by
805 automatically creating and populating task groups. This separation
806 of workloads isolates aggressive CPU burners (like build jobs) from
807 desktop applications. Task group autogeneration is currently based
808 upon task session.
809
744config MM_OWNER 810config MM_OWNER
745 bool 811 bool
746 812
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 830aaec9c7d5..2b54bef33b55 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -93,7 +93,7 @@ no_match:
93 * 93 *
94 * Returns the matching dev_t on success or 0 on failure. 94 * Returns the matching dev_t on success or 0 on failure.
95 */ 95 */
96static dev_t __init devt_from_partuuid(char *uuid_str) 96static dev_t devt_from_partuuid(char *uuid_str)
97{ 97{
98 dev_t res = 0; 98 dev_t res = 0;
99 struct device *dev = NULL; 99 struct device *dev = NULL;
diff --git a/init/main.c b/init/main.c
index 8646401f7a0e..ea51770c0170 100644
--- a/init/main.c
+++ b/init/main.c
@@ -67,6 +67,7 @@
67#include <linux/sfi.h> 67#include <linux/sfi.h>
68#include <linux/shmem_fs.h> 68#include <linux/shmem_fs.h>
69#include <linux/slab.h> 69#include <linux/slab.h>
70#include <linux/perf_event.h>
70 71
71#include <asm/io.h> 72#include <asm/io.h>
72#include <asm/bugs.h> 73#include <asm/bugs.h>
@@ -603,6 +604,8 @@ asmlinkage void __init start_kernel(void)
603 "enabled *very* early, fixing it\n"); 604 "enabled *very* early, fixing it\n");
604 local_irq_disable(); 605 local_irq_disable();
605 } 606 }
607 idr_init_cache();
608 perf_event_init();
606 rcu_init(); 609 rcu_init();
607 radix_tree_init(); 610 radix_tree_init();
608 /* init some links before init_ISA_irqs() */ 611 /* init some links before init_ISA_irqs() */
@@ -658,7 +661,6 @@ asmlinkage void __init start_kernel(void)
658 enable_debug_pagealloc(); 661 enable_debug_pagealloc();
659 kmemleak_init(); 662 kmemleak_init();
660 debug_objects_mem_init(); 663 debug_objects_mem_init();
661 idr_init_cache();
662 setup_per_cpu_pageset(); 664 setup_per_cpu_pageset();
663 numa_policy_init(); 665 numa_policy_init();
664 if (late_time_init) 666 if (late_time_init)
@@ -882,6 +884,7 @@ static int __init kernel_init(void * unused)
882 smp_prepare_cpus(setup_max_cpus); 884 smp_prepare_cpus(setup_max_cpus);
883 885
884 do_pre_smp_initcalls(); 886 do_pre_smp_initcalls();
887 lockup_detector_init();
885 888
886 smp_init(); 889 smp_init();
887 sched_init_smp(); 890 sched_init_smp();
diff --git a/kernel/cpu.c b/kernel/cpu.c
index f6e726f18491..cb7a1efa9c2b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -189,7 +189,6 @@ static inline void check_for_tasks(int cpu)
189} 189}
190 190
191struct take_cpu_down_param { 191struct take_cpu_down_param {
192 struct task_struct *caller;
193 unsigned long mod; 192 unsigned long mod;
194 void *hcpu; 193 void *hcpu;
195}; 194};
@@ -198,7 +197,6 @@ struct take_cpu_down_param {
198static int __ref take_cpu_down(void *_param) 197static int __ref take_cpu_down(void *_param)
199{ 198{
200 struct take_cpu_down_param *param = _param; 199 struct take_cpu_down_param *param = _param;
201 unsigned int cpu = (unsigned long)param->hcpu;
202 int err; 200 int err;
203 201
204 /* Ensure this CPU doesn't handle any more interrupts. */ 202 /* Ensure this CPU doesn't handle any more interrupts. */
@@ -208,11 +206,6 @@ static int __ref take_cpu_down(void *_param)
208 206
209 cpu_notify(CPU_DYING | param->mod, param->hcpu); 207 cpu_notify(CPU_DYING | param->mod, param->hcpu);
210 208
211 if (task_cpu(param->caller) == cpu)
212 move_task_off_dead_cpu(cpu, param->caller);
213 /* Force idle task to run as soon as we yield: it should
214 immediately notice cpu is offline and die quickly. */
215 sched_idle_next();
216 return 0; 209 return 0;
217} 210}
218 211
@@ -223,7 +216,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
223 void *hcpu = (void *)(long)cpu; 216 void *hcpu = (void *)(long)cpu;
224 unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; 217 unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
225 struct take_cpu_down_param tcd_param = { 218 struct take_cpu_down_param tcd_param = {
226 .caller = current,
227 .mod = mod, 219 .mod = mod,
228 .hcpu = hcpu, 220 .hcpu = hcpu,
229 }; 221 };
@@ -253,9 +245,15 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
253 } 245 }
254 BUG_ON(cpu_online(cpu)); 246 BUG_ON(cpu_online(cpu));
255 247
256 /* Wait for it to sleep (leaving idle task). */ 248 /*
249 * The migration_call() CPU_DYING callback will have removed all
250 * runnable tasks from the cpu, there's only the idle task left now
251 * that the migration thread is done doing the stop_machine thing.
252 *
253 * Wait for the stop thread to go away.
254 */
257 while (!idle_cpu(cpu)) 255 while (!idle_cpu(cpu))
258 yield(); 256 cpu_relax();
259 257
260 /* This actually kills the CPU. */ 258 /* This actually kills the CPU. */
261 __cpu_die(cpu); 259 __cpu_die(cpu);
diff --git a/kernel/fork.c b/kernel/fork.c
index 5447dc7defa9..7d164e25b0f0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -174,8 +174,10 @@ static inline void free_signal_struct(struct signal_struct *sig)
174 174
175static inline void put_signal_struct(struct signal_struct *sig) 175static inline void put_signal_struct(struct signal_struct *sig)
176{ 176{
177 if (atomic_dec_and_test(&sig->sigcnt)) 177 if (atomic_dec_and_test(&sig->sigcnt)) {
178 sched_autogroup_exit(sig);
178 free_signal_struct(sig); 179 free_signal_struct(sig);
180 }
179} 181}
180 182
181void __put_task_struct(struct task_struct *tsk) 183void __put_task_struct(struct task_struct *tsk)
@@ -905,6 +907,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
905 posix_cpu_timers_init_group(sig); 907 posix_cpu_timers_init_group(sig);
906 908
907 tty_audit_fork(sig); 909 tty_audit_fork(sig);
910 sched_autogroup_fork(sig);
908 911
909 sig->oom_adj = current->signal->oom_adj; 912 sig->oom_adj = current->signal->oom_adj;
910 sig->oom_score_adj = current->signal->oom_score_adj; 913 sig->oom_score_adj = current->signal->oom_score_adj;
@@ -1315,7 +1318,7 @@ bad_fork_cleanup_mm:
1315 } 1318 }
1316bad_fork_cleanup_signal: 1319bad_fork_cleanup_signal:
1317 if (!(clone_flags & CLONE_THREAD)) 1320 if (!(clone_flags & CLONE_THREAD))
1318 free_signal_struct(p->signal); 1321 put_signal_struct(p->signal);
1319bad_fork_cleanup_sighand: 1322bad_fork_cleanup_sighand:
1320 __cleanup_sighand(p->sighand); 1323 __cleanup_sighand(p->sighand);
1321bad_fork_cleanup_fs: 1324bad_fork_cleanup_fs:
diff --git a/kernel/futex.c b/kernel/futex.c
index 40a8777a27d0..3019b92e6917 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -69,6 +69,14 @@ int __read_mostly futex_cmpxchg_enabled;
69#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) 69#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
70 70
71/* 71/*
72 * Futex flags used to encode options to functions and preserve them across
73 * restarts.
74 */
75#define FLAGS_SHARED 0x01
76#define FLAGS_CLOCKRT 0x02
77#define FLAGS_HAS_TIMEOUT 0x04
78
79/*
72 * Priority Inheritance state: 80 * Priority Inheritance state:
73 */ 81 */
74struct futex_pi_state { 82struct futex_pi_state {
@@ -123,6 +131,12 @@ struct futex_q {
123 u32 bitset; 131 u32 bitset;
124}; 132};
125 133
134static const struct futex_q futex_q_init = {
135 /* list gets initialized in queue_me()*/
136 .key = FUTEX_KEY_INIT,
137 .bitset = FUTEX_BITSET_MATCH_ANY
138};
139
126/* 140/*
127 * Hash buckets are shared by all the futex_keys that hash to the same 141 * Hash buckets are shared by all the futex_keys that hash to the same
128 * location. Each key may have multiple futex_q structures, one for each task 142 * location. Each key may have multiple futex_q structures, one for each task
@@ -283,8 +297,7 @@ again:
283 return 0; 297 return 0;
284} 298}
285 299
286static inline 300static inline void put_futex_key(union futex_key *key)
287void put_futex_key(int fshared, union futex_key *key)
288{ 301{
289 drop_futex_key_refs(key); 302 drop_futex_key_refs(key);
290} 303}
@@ -870,7 +883,8 @@ double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
870/* 883/*
871 * Wake up waiters matching bitset queued on this futex (uaddr). 884 * Wake up waiters matching bitset queued on this futex (uaddr).
872 */ 885 */
873static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) 886static int
887futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
874{ 888{
875 struct futex_hash_bucket *hb; 889 struct futex_hash_bucket *hb;
876 struct futex_q *this, *next; 890 struct futex_q *this, *next;
@@ -881,7 +895,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
881 if (!bitset) 895 if (!bitset)
882 return -EINVAL; 896 return -EINVAL;
883 897
884 ret = get_futex_key(uaddr, fshared, &key); 898 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key);
885 if (unlikely(ret != 0)) 899 if (unlikely(ret != 0))
886 goto out; 900 goto out;
887 901
@@ -907,7 +921,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
907 } 921 }
908 922
909 spin_unlock(&hb->lock); 923 spin_unlock(&hb->lock);
910 put_futex_key(fshared, &key); 924 put_futex_key(&key);
911out: 925out:
912 return ret; 926 return ret;
913} 927}
@@ -917,7 +931,7 @@ out:
917 * to this virtual address: 931 * to this virtual address:
918 */ 932 */
919static int 933static int
920futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, 934futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
921 int nr_wake, int nr_wake2, int op) 935 int nr_wake, int nr_wake2, int op)
922{ 936{
923 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; 937 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
@@ -927,10 +941,10 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
927 int ret, op_ret; 941 int ret, op_ret;
928 942
929retry: 943retry:
930 ret = get_futex_key(uaddr1, fshared, &key1); 944 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1);
931 if (unlikely(ret != 0)) 945 if (unlikely(ret != 0))
932 goto out; 946 goto out;
933 ret = get_futex_key(uaddr2, fshared, &key2); 947 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2);
934 if (unlikely(ret != 0)) 948 if (unlikely(ret != 0))
935 goto out_put_key1; 949 goto out_put_key1;
936 950
@@ -962,11 +976,11 @@ retry_private:
962 if (ret) 976 if (ret)
963 goto out_put_keys; 977 goto out_put_keys;
964 978
965 if (!fshared) 979 if (!(flags & FLAGS_SHARED))
966 goto retry_private; 980 goto retry_private;
967 981
968 put_futex_key(fshared, &key2); 982 put_futex_key(&key2);
969 put_futex_key(fshared, &key1); 983 put_futex_key(&key1);
970 goto retry; 984 goto retry;
971 } 985 }
972 986
@@ -996,9 +1010,9 @@ retry_private:
996 1010
997 double_unlock_hb(hb1, hb2); 1011 double_unlock_hb(hb1, hb2);
998out_put_keys: 1012out_put_keys:
999 put_futex_key(fshared, &key2); 1013 put_futex_key(&key2);
1000out_put_key1: 1014out_put_key1:
1001 put_futex_key(fshared, &key1); 1015 put_futex_key(&key1);
1002out: 1016out:
1003 return ret; 1017 return ret;
1004} 1018}
@@ -1133,13 +1147,13 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1133/** 1147/**
1134 * futex_requeue() - Requeue waiters from uaddr1 to uaddr2 1148 * futex_requeue() - Requeue waiters from uaddr1 to uaddr2
1135 * @uaddr1: source futex user address 1149 * @uaddr1: source futex user address
1136 * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED 1150 * @flags: futex flags (FLAGS_SHARED, etc.)
1137 * @uaddr2: target futex user address 1151 * @uaddr2: target futex user address
1138 * @nr_wake: number of waiters to wake (must be 1 for requeue_pi) 1152 * @nr_wake: number of waiters to wake (must be 1 for requeue_pi)
1139 * @nr_requeue: number of waiters to requeue (0-INT_MAX) 1153 * @nr_requeue: number of waiters to requeue (0-INT_MAX)
1140 * @cmpval: @uaddr1 expected value (or %NULL) 1154 * @cmpval: @uaddr1 expected value (or %NULL)
1141 * @requeue_pi: if we are attempting to requeue from a non-pi futex to a 1155 * @requeue_pi: if we are attempting to requeue from a non-pi futex to a
1142 * pi futex (pi to pi requeue is not supported) 1156 * pi futex (pi to pi requeue is not supported)
1143 * 1157 *
1144 * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire 1158 * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire
1145 * uaddr2 atomically on behalf of the top waiter. 1159 * uaddr2 atomically on behalf of the top waiter.
@@ -1148,9 +1162,9 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1148 * >=0 - on success, the number of tasks requeued or woken 1162 * >=0 - on success, the number of tasks requeued or woken
1149 * <0 - on error 1163 * <0 - on error
1150 */ 1164 */
1151static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, 1165static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
1152 int nr_wake, int nr_requeue, u32 *cmpval, 1166 u32 __user *uaddr2, int nr_wake, int nr_requeue,
1153 int requeue_pi) 1167 u32 *cmpval, int requeue_pi)
1154{ 1168{
1155 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; 1169 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1156 int drop_count = 0, task_count = 0, ret; 1170 int drop_count = 0, task_count = 0, ret;
@@ -1191,10 +1205,10 @@ retry:
1191 pi_state = NULL; 1205 pi_state = NULL;
1192 } 1206 }
1193 1207
1194 ret = get_futex_key(uaddr1, fshared, &key1); 1208 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1);
1195 if (unlikely(ret != 0)) 1209 if (unlikely(ret != 0))
1196 goto out; 1210 goto out;
1197 ret = get_futex_key(uaddr2, fshared, &key2); 1211 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2);
1198 if (unlikely(ret != 0)) 1212 if (unlikely(ret != 0))
1199 goto out_put_key1; 1213 goto out_put_key1;
1200 1214
@@ -1216,11 +1230,11 @@ retry_private:
1216 if (ret) 1230 if (ret)
1217 goto out_put_keys; 1231 goto out_put_keys;
1218 1232
1219 if (!fshared) 1233 if (!(flags & FLAGS_SHARED))
1220 goto retry_private; 1234 goto retry_private;
1221 1235
1222 put_futex_key(fshared, &key2); 1236 put_futex_key(&key2);
1223 put_futex_key(fshared, &key1); 1237 put_futex_key(&key1);
1224 goto retry; 1238 goto retry;
1225 } 1239 }
1226 if (curval != *cmpval) { 1240 if (curval != *cmpval) {
@@ -1260,8 +1274,8 @@ retry_private:
1260 break; 1274 break;
1261 case -EFAULT: 1275 case -EFAULT:
1262 double_unlock_hb(hb1, hb2); 1276 double_unlock_hb(hb1, hb2);
1263 put_futex_key(fshared, &key2); 1277 put_futex_key(&key2);
1264 put_futex_key(fshared, &key1); 1278 put_futex_key(&key1);
1265 ret = fault_in_user_writeable(uaddr2); 1279 ret = fault_in_user_writeable(uaddr2);
1266 if (!ret) 1280 if (!ret)
1267 goto retry; 1281 goto retry;
@@ -1269,8 +1283,8 @@ retry_private:
1269 case -EAGAIN: 1283 case -EAGAIN:
1270 /* The owner was exiting, try again. */ 1284 /* The owner was exiting, try again. */
1271 double_unlock_hb(hb1, hb2); 1285 double_unlock_hb(hb1, hb2);
1272 put_futex_key(fshared, &key2); 1286 put_futex_key(&key2);
1273 put_futex_key(fshared, &key1); 1287 put_futex_key(&key1);
1274 cond_resched(); 1288 cond_resched();
1275 goto retry; 1289 goto retry;
1276 default: 1290 default:
@@ -1352,9 +1366,9 @@ out_unlock:
1352 drop_futex_key_refs(&key1); 1366 drop_futex_key_refs(&key1);
1353 1367
1354out_put_keys: 1368out_put_keys:
1355 put_futex_key(fshared, &key2); 1369 put_futex_key(&key2);
1356out_put_key1: 1370out_put_key1:
1357 put_futex_key(fshared, &key1); 1371 put_futex_key(&key1);
1358out: 1372out:
1359 if (pi_state != NULL) 1373 if (pi_state != NULL)
1360 free_pi_state(pi_state); 1374 free_pi_state(pi_state);
@@ -1494,7 +1508,7 @@ static void unqueue_me_pi(struct futex_q *q)
1494 * private futexes. 1508 * private futexes.
1495 */ 1509 */
1496static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, 1510static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1497 struct task_struct *newowner, int fshared) 1511 struct task_struct *newowner)
1498{ 1512{
1499 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; 1513 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
1500 struct futex_pi_state *pi_state = q->pi_state; 1514 struct futex_pi_state *pi_state = q->pi_state;
@@ -1587,20 +1601,11 @@ handle_fault:
1587 goto retry; 1601 goto retry;
1588} 1602}
1589 1603
1590/*
1591 * In case we must use restart_block to restart a futex_wait,
1592 * we encode in the 'flags' shared capability
1593 */
1594#define FLAGS_SHARED 0x01
1595#define FLAGS_CLOCKRT 0x02
1596#define FLAGS_HAS_TIMEOUT 0x04
1597
1598static long futex_wait_restart(struct restart_block *restart); 1604static long futex_wait_restart(struct restart_block *restart);
1599 1605
1600/** 1606/**
1601 * fixup_owner() - Post lock pi_state and corner case management 1607 * fixup_owner() - Post lock pi_state and corner case management
1602 * @uaddr: user address of the futex 1608 * @uaddr: user address of the futex
1603 * @fshared: whether the futex is shared (1) or not (0)
1604 * @q: futex_q (contains pi_state and access to the rt_mutex) 1609 * @q: futex_q (contains pi_state and access to the rt_mutex)
1605 * @locked: if the attempt to take the rt_mutex succeeded (1) or not (0) 1610 * @locked: if the attempt to take the rt_mutex succeeded (1) or not (0)
1606 * 1611 *
@@ -1613,8 +1618,7 @@ static long futex_wait_restart(struct restart_block *restart);
1613 * 0 - success, lock not taken 1618 * 0 - success, lock not taken
1614 * <0 - on error (-EFAULT) 1619 * <0 - on error (-EFAULT)
1615 */ 1620 */
1616static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q, 1621static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
1617 int locked)
1618{ 1622{
1619 struct task_struct *owner; 1623 struct task_struct *owner;
1620 int ret = 0; 1624 int ret = 0;
@@ -1625,7 +1629,7 @@ static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q,
1625 * did a lock-steal - fix up the PI-state in that case: 1629 * did a lock-steal - fix up the PI-state in that case:
1626 */ 1630 */
1627 if (q->pi_state->owner != current) 1631 if (q->pi_state->owner != current)
1628 ret = fixup_pi_state_owner(uaddr, q, current, fshared); 1632 ret = fixup_pi_state_owner(uaddr, q, current);
1629 goto out; 1633 goto out;
1630 } 1634 }
1631 1635
@@ -1652,7 +1656,7 @@ static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q,
1652 * lock. Fix the state up. 1656 * lock. Fix the state up.
1653 */ 1657 */
1654 owner = rt_mutex_owner(&q->pi_state->pi_mutex); 1658 owner = rt_mutex_owner(&q->pi_state->pi_mutex);
1655 ret = fixup_pi_state_owner(uaddr, q, owner, fshared); 1659 ret = fixup_pi_state_owner(uaddr, q, owner);
1656 goto out; 1660 goto out;
1657 } 1661 }
1658 1662
@@ -1715,7 +1719,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
1715 * futex_wait_setup() - Prepare to wait on a futex 1719 * futex_wait_setup() - Prepare to wait on a futex
1716 * @uaddr: the futex userspace address 1720 * @uaddr: the futex userspace address
1717 * @val: the expected value 1721 * @val: the expected value
1718 * @fshared: whether the futex is shared (1) or not (0) 1722 * @flags: futex flags (FLAGS_SHARED, etc.)
1719 * @q: the associated futex_q 1723 * @q: the associated futex_q
1720 * @hb: storage for hash_bucket pointer to be returned to caller 1724 * @hb: storage for hash_bucket pointer to be returned to caller
1721 * 1725 *
@@ -1728,7 +1732,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
1728 * 0 - uaddr contains val and hb has been locked 1732 * 0 - uaddr contains val and hb has been locked
1729 * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked 1733 * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked
1730 */ 1734 */
1731static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared, 1735static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
1732 struct futex_q *q, struct futex_hash_bucket **hb) 1736 struct futex_q *q, struct futex_hash_bucket **hb)
1733{ 1737{
1734 u32 uval; 1738 u32 uval;
@@ -1752,8 +1756,7 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared,
1752 * rare, but normal. 1756 * rare, but normal.
1753 */ 1757 */
1754retry: 1758retry:
1755 q->key = FUTEX_KEY_INIT; 1759 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key);
1756 ret = get_futex_key(uaddr, fshared, &q->key);
1757 if (unlikely(ret != 0)) 1760 if (unlikely(ret != 0))
1758 return ret; 1761 return ret;
1759 1762
@@ -1769,10 +1772,10 @@ retry_private:
1769 if (ret) 1772 if (ret)
1770 goto out; 1773 goto out;
1771 1774
1772 if (!fshared) 1775 if (!(flags & FLAGS_SHARED))
1773 goto retry_private; 1776 goto retry_private;
1774 1777
1775 put_futex_key(fshared, &q->key); 1778 put_futex_key(&q->key);
1776 goto retry; 1779 goto retry;
1777 } 1780 }
1778 1781
@@ -1783,32 +1786,29 @@ retry_private:
1783 1786
1784out: 1787out:
1785 if (ret) 1788 if (ret)
1786 put_futex_key(fshared, &q->key); 1789 put_futex_key(&q->key);
1787 return ret; 1790 return ret;
1788} 1791}
1789 1792
1790static int futex_wait(u32 __user *uaddr, int fshared, 1793static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
1791 u32 val, ktime_t *abs_time, u32 bitset, int clockrt) 1794 ktime_t *abs_time, u32 bitset)
1792{ 1795{
1793 struct hrtimer_sleeper timeout, *to = NULL; 1796 struct hrtimer_sleeper timeout, *to = NULL;
1794 struct restart_block *restart; 1797 struct restart_block *restart;
1795 struct futex_hash_bucket *hb; 1798 struct futex_hash_bucket *hb;
1796 struct futex_q q; 1799 struct futex_q q = futex_q_init;
1797 int ret; 1800 int ret;
1798 1801
1799 if (!bitset) 1802 if (!bitset)
1800 return -EINVAL; 1803 return -EINVAL;
1801
1802 q.pi_state = NULL;
1803 q.bitset = bitset; 1804 q.bitset = bitset;
1804 q.rt_waiter = NULL;
1805 q.requeue_pi_key = NULL;
1806 1805
1807 if (abs_time) { 1806 if (abs_time) {
1808 to = &timeout; 1807 to = &timeout;
1809 1808
1810 hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME : 1809 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
1811 CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 1810 CLOCK_REALTIME : CLOCK_MONOTONIC,
1811 HRTIMER_MODE_ABS);
1812 hrtimer_init_sleeper(to, current); 1812 hrtimer_init_sleeper(to, current);
1813 hrtimer_set_expires_range_ns(&to->timer, *abs_time, 1813 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
1814 current->timer_slack_ns); 1814 current->timer_slack_ns);
@@ -1819,7 +1819,7 @@ retry:
1819 * Prepare to wait on uaddr. On success, holds hb lock and increments 1819 * Prepare to wait on uaddr. On success, holds hb lock and increments
1820 * q.key refs. 1820 * q.key refs.
1821 */ 1821 */
1822 ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); 1822 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
1823 if (ret) 1823 if (ret)
1824 goto out; 1824 goto out;
1825 1825
@@ -1852,12 +1852,7 @@ retry:
1852 restart->futex.val = val; 1852 restart->futex.val = val;
1853 restart->futex.time = abs_time->tv64; 1853 restart->futex.time = abs_time->tv64;
1854 restart->futex.bitset = bitset; 1854 restart->futex.bitset = bitset;
1855 restart->futex.flags = FLAGS_HAS_TIMEOUT; 1855 restart->futex.flags = flags;
1856
1857 if (fshared)
1858 restart->futex.flags |= FLAGS_SHARED;
1859 if (clockrt)
1860 restart->futex.flags |= FLAGS_CLOCKRT;
1861 1856
1862 ret = -ERESTART_RESTARTBLOCK; 1857 ret = -ERESTART_RESTARTBLOCK;
1863 1858
@@ -1873,7 +1868,6 @@ out:
1873static long futex_wait_restart(struct restart_block *restart) 1868static long futex_wait_restart(struct restart_block *restart)
1874{ 1869{
1875 u32 __user *uaddr = restart->futex.uaddr; 1870 u32 __user *uaddr = restart->futex.uaddr;
1876 int fshared = 0;
1877 ktime_t t, *tp = NULL; 1871 ktime_t t, *tp = NULL;
1878 1872
1879 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { 1873 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
@@ -1881,11 +1875,9 @@ static long futex_wait_restart(struct restart_block *restart)
1881 tp = &t; 1875 tp = &t;
1882 } 1876 }
1883 restart->fn = do_no_restart_syscall; 1877 restart->fn = do_no_restart_syscall;
1884 if (restart->futex.flags & FLAGS_SHARED) 1878
1885 fshared = 1; 1879 return (long)futex_wait(uaddr, restart->futex.flags,
1886 return (long)futex_wait(uaddr, fshared, restart->futex.val, tp, 1880 restart->futex.val, tp, restart->futex.bitset);
1887 restart->futex.bitset,
1888 restart->futex.flags & FLAGS_CLOCKRT);
1889} 1881}
1890 1882
1891 1883
@@ -1895,12 +1887,12 @@ static long futex_wait_restart(struct restart_block *restart)
1895 * if there are waiters then it will block, it does PI, etc. (Due to 1887 * if there are waiters then it will block, it does PI, etc. (Due to
1896 * races the kernel might see a 0 value of the futex too.) 1888 * races the kernel might see a 0 value of the futex too.)
1897 */ 1889 */
1898static int futex_lock_pi(u32 __user *uaddr, int fshared, 1890static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect,
1899 int detect, ktime_t *time, int trylock) 1891 ktime_t *time, int trylock)
1900{ 1892{
1901 struct hrtimer_sleeper timeout, *to = NULL; 1893 struct hrtimer_sleeper timeout, *to = NULL;
1902 struct futex_hash_bucket *hb; 1894 struct futex_hash_bucket *hb;
1903 struct futex_q q; 1895 struct futex_q q = futex_q_init;
1904 int res, ret; 1896 int res, ret;
1905 1897
1906 if (refill_pi_state_cache()) 1898 if (refill_pi_state_cache())
@@ -1914,12 +1906,8 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
1914 hrtimer_set_expires(&to->timer, *time); 1906 hrtimer_set_expires(&to->timer, *time);
1915 } 1907 }
1916 1908
1917 q.pi_state = NULL;
1918 q.rt_waiter = NULL;
1919 q.requeue_pi_key = NULL;
1920retry: 1909retry:
1921 q.key = FUTEX_KEY_INIT; 1910 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key);
1922 ret = get_futex_key(uaddr, fshared, &q.key);
1923 if (unlikely(ret != 0)) 1911 if (unlikely(ret != 0))
1924 goto out; 1912 goto out;
1925 1913
@@ -1941,7 +1929,7 @@ retry_private:
1941 * exit to complete. 1929 * exit to complete.
1942 */ 1930 */
1943 queue_unlock(&q, hb); 1931 queue_unlock(&q, hb);
1944 put_futex_key(fshared, &q.key); 1932 put_futex_key(&q.key);
1945 cond_resched(); 1933 cond_resched();
1946 goto retry; 1934 goto retry;
1947 default: 1935 default:
@@ -1971,7 +1959,7 @@ retry_private:
1971 * Fixup the pi_state owner and possibly acquire the lock if we 1959 * Fixup the pi_state owner and possibly acquire the lock if we
1972 * haven't already. 1960 * haven't already.
1973 */ 1961 */
1974 res = fixup_owner(uaddr, fshared, &q, !ret); 1962 res = fixup_owner(uaddr, &q, !ret);
1975 /* 1963 /*
1976 * If fixup_owner() returned an error, proprogate that. If it acquired 1964 * If fixup_owner() returned an error, proprogate that. If it acquired
1977 * the lock, clear our -ETIMEDOUT or -EINTR. 1965 * the lock, clear our -ETIMEDOUT or -EINTR.
@@ -1995,7 +1983,7 @@ out_unlock_put_key:
1995 queue_unlock(&q, hb); 1983 queue_unlock(&q, hb);
1996 1984
1997out_put_key: 1985out_put_key:
1998 put_futex_key(fshared, &q.key); 1986 put_futex_key(&q.key);
1999out: 1987out:
2000 if (to) 1988 if (to)
2001 destroy_hrtimer_on_stack(&to->timer); 1989 destroy_hrtimer_on_stack(&to->timer);
@@ -2008,10 +1996,10 @@ uaddr_faulted:
2008 if (ret) 1996 if (ret)
2009 goto out_put_key; 1997 goto out_put_key;
2010 1998
2011 if (!fshared) 1999 if (!(flags & FLAGS_SHARED))
2012 goto retry_private; 2000 goto retry_private;
2013 2001
2014 put_futex_key(fshared, &q.key); 2002 put_futex_key(&q.key);
2015 goto retry; 2003 goto retry;
2016} 2004}
2017 2005
@@ -2020,7 +2008,7 @@ uaddr_faulted:
2020 * This is the in-kernel slowpath: we look up the PI state (if any), 2008 * This is the in-kernel slowpath: we look up the PI state (if any),
2021 * and do the rt-mutex unlock. 2009 * and do the rt-mutex unlock.
2022 */ 2010 */
2023static int futex_unlock_pi(u32 __user *uaddr, int fshared) 2011static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
2024{ 2012{
2025 struct futex_hash_bucket *hb; 2013 struct futex_hash_bucket *hb;
2026 struct futex_q *this, *next; 2014 struct futex_q *this, *next;
@@ -2038,7 +2026,7 @@ retry:
2038 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current)) 2026 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
2039 return -EPERM; 2027 return -EPERM;
2040 2028
2041 ret = get_futex_key(uaddr, fshared, &key); 2029 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key);
2042 if (unlikely(ret != 0)) 2030 if (unlikely(ret != 0))
2043 goto out; 2031 goto out;
2044 2032
@@ -2093,14 +2081,14 @@ retry:
2093 2081
2094out_unlock: 2082out_unlock:
2095 spin_unlock(&hb->lock); 2083 spin_unlock(&hb->lock);
2096 put_futex_key(fshared, &key); 2084 put_futex_key(&key);
2097 2085
2098out: 2086out:
2099 return ret; 2087 return ret;
2100 2088
2101pi_faulted: 2089pi_faulted:
2102 spin_unlock(&hb->lock); 2090 spin_unlock(&hb->lock);
2103 put_futex_key(fshared, &key); 2091 put_futex_key(&key);
2104 2092
2105 ret = fault_in_user_writeable(uaddr); 2093 ret = fault_in_user_writeable(uaddr);
2106 if (!ret) 2094 if (!ret)
@@ -2160,7 +2148,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2160/** 2148/**
2161 * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 2149 * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
2162 * @uaddr: the futex we initially wait on (non-pi) 2150 * @uaddr: the futex we initially wait on (non-pi)
2163 * @fshared: whether the futexes are shared (1) or not (0). They must be 2151 * @flags: futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be
2164 * the same type, no requeueing from private to shared, etc. 2152 * the same type, no requeueing from private to shared, etc.
2165 * @val: the expected value of uaddr 2153 * @val: the expected value of uaddr
2166 * @abs_time: absolute timeout 2154 * @abs_time: absolute timeout
@@ -2198,16 +2186,16 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2198 * 0 - On success 2186 * 0 - On success
2199 * <0 - On error 2187 * <0 - On error
2200 */ 2188 */
2201static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, 2189static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2202 u32 val, ktime_t *abs_time, u32 bitset, 2190 u32 val, ktime_t *abs_time, u32 bitset,
2203 int clockrt, u32 __user *uaddr2) 2191 u32 __user *uaddr2)
2204{ 2192{
2205 struct hrtimer_sleeper timeout, *to = NULL; 2193 struct hrtimer_sleeper timeout, *to = NULL;
2206 struct rt_mutex_waiter rt_waiter; 2194 struct rt_mutex_waiter rt_waiter;
2207 struct rt_mutex *pi_mutex = NULL; 2195 struct rt_mutex *pi_mutex = NULL;
2208 struct futex_hash_bucket *hb; 2196 struct futex_hash_bucket *hb;
2209 union futex_key key2; 2197 union futex_key key2 = FUTEX_KEY_INIT;
2210 struct futex_q q; 2198 struct futex_q q = futex_q_init;
2211 int res, ret; 2199 int res, ret;
2212 2200
2213 if (!bitset) 2201 if (!bitset)
@@ -2215,8 +2203,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
2215 2203
2216 if (abs_time) { 2204 if (abs_time) {
2217 to = &timeout; 2205 to = &timeout;
2218 hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME : 2206 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2219 CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 2207 CLOCK_REALTIME : CLOCK_MONOTONIC,
2208 HRTIMER_MODE_ABS);
2220 hrtimer_init_sleeper(to, current); 2209 hrtimer_init_sleeper(to, current);
2221 hrtimer_set_expires_range_ns(&to->timer, *abs_time, 2210 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2222 current->timer_slack_ns); 2211 current->timer_slack_ns);
@@ -2229,12 +2218,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
2229 debug_rt_mutex_init_waiter(&rt_waiter); 2218 debug_rt_mutex_init_waiter(&rt_waiter);
2230 rt_waiter.task = NULL; 2219 rt_waiter.task = NULL;
2231 2220
2232 key2 = FUTEX_KEY_INIT; 2221 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2);
2233 ret = get_futex_key(uaddr2, fshared, &key2);
2234 if (unlikely(ret != 0)) 2222 if (unlikely(ret != 0))
2235 goto out; 2223 goto out;
2236 2224
2237 q.pi_state = NULL;
2238 q.bitset = bitset; 2225 q.bitset = bitset;
2239 q.rt_waiter = &rt_waiter; 2226 q.rt_waiter = &rt_waiter;
2240 q.requeue_pi_key = &key2; 2227 q.requeue_pi_key = &key2;
@@ -2243,7 +2230,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
2243 * Prepare to wait on uaddr. On success, increments q.key (key1) ref 2230 * Prepare to wait on uaddr. On success, increments q.key (key1) ref
2244 * count. 2231 * count.
2245 */ 2232 */
2246 ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); 2233 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2247 if (ret) 2234 if (ret)
2248 goto out_key2; 2235 goto out_key2;
2249 2236
@@ -2273,8 +2260,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
2273 */ 2260 */
2274 if (q.pi_state && (q.pi_state->owner != current)) { 2261 if (q.pi_state && (q.pi_state->owner != current)) {
2275 spin_lock(q.lock_ptr); 2262 spin_lock(q.lock_ptr);
2276 ret = fixup_pi_state_owner(uaddr2, &q, current, 2263 ret = fixup_pi_state_owner(uaddr2, &q, current);
2277 fshared);
2278 spin_unlock(q.lock_ptr); 2264 spin_unlock(q.lock_ptr);
2279 } 2265 }
2280 } else { 2266 } else {
@@ -2293,7 +2279,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
2293 * Fixup the pi_state owner and possibly acquire the lock if we 2279 * Fixup the pi_state owner and possibly acquire the lock if we
2294 * haven't already. 2280 * haven't already.
2295 */ 2281 */
2296 res = fixup_owner(uaddr2, fshared, &q, !ret); 2282 res = fixup_owner(uaddr2, &q, !ret);
2297 /* 2283 /*
2298 * If fixup_owner() returned an error, proprogate that. If it 2284 * If fixup_owner() returned an error, proprogate that. If it
2299 * acquired the lock, clear -ETIMEDOUT or -EINTR. 2285 * acquired the lock, clear -ETIMEDOUT or -EINTR.
@@ -2324,9 +2310,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
2324 } 2310 }
2325 2311
2326out_put_keys: 2312out_put_keys:
2327 put_futex_key(fshared, &q.key); 2313 put_futex_key(&q.key);
2328out_key2: 2314out_key2:
2329 put_futex_key(fshared, &key2); 2315 put_futex_key(&key2);
2330 2316
2331out: 2317out:
2332 if (to) { 2318 if (to) {
@@ -2551,58 +2537,57 @@ void exit_robust_list(struct task_struct *curr)
2551long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, 2537long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
2552 u32 __user *uaddr2, u32 val2, u32 val3) 2538 u32 __user *uaddr2, u32 val2, u32 val3)
2553{ 2539{
2554 int clockrt, ret = -ENOSYS; 2540 int ret = -ENOSYS, cmd = op & FUTEX_CMD_MASK;
2555 int cmd = op & FUTEX_CMD_MASK; 2541 unsigned int flags = 0;
2556 int fshared = 0;
2557 2542
2558 if (!(op & FUTEX_PRIVATE_FLAG)) 2543 if (!(op & FUTEX_PRIVATE_FLAG))
2559 fshared = 1; 2544 flags |= FLAGS_SHARED;
2560 2545
2561 clockrt = op & FUTEX_CLOCK_REALTIME; 2546 if (op & FUTEX_CLOCK_REALTIME) {
2562 if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI) 2547 flags |= FLAGS_CLOCKRT;
2563 return -ENOSYS; 2548 if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
2549 return -ENOSYS;
2550 }
2564 2551
2565 switch (cmd) { 2552 switch (cmd) {
2566 case FUTEX_WAIT: 2553 case FUTEX_WAIT:
2567 val3 = FUTEX_BITSET_MATCH_ANY; 2554 val3 = FUTEX_BITSET_MATCH_ANY;
2568 case FUTEX_WAIT_BITSET: 2555 case FUTEX_WAIT_BITSET:
2569 ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt); 2556 ret = futex_wait(uaddr, flags, val, timeout, val3);
2570 break; 2557 break;
2571 case FUTEX_WAKE: 2558 case FUTEX_WAKE:
2572 val3 = FUTEX_BITSET_MATCH_ANY; 2559 val3 = FUTEX_BITSET_MATCH_ANY;
2573 case FUTEX_WAKE_BITSET: 2560 case FUTEX_WAKE_BITSET:
2574 ret = futex_wake(uaddr, fshared, val, val3); 2561 ret = futex_wake(uaddr, flags, val, val3);
2575 break; 2562 break;
2576 case FUTEX_REQUEUE: 2563 case FUTEX_REQUEUE:
2577 ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0); 2564 ret = futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
2578 break; 2565 break;
2579 case FUTEX_CMP_REQUEUE: 2566 case FUTEX_CMP_REQUEUE:
2580 ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, 2567 ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
2581 0);
2582 break; 2568 break;
2583 case FUTEX_WAKE_OP: 2569 case FUTEX_WAKE_OP:
2584 ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); 2570 ret = futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
2585 break; 2571 break;
2586 case FUTEX_LOCK_PI: 2572 case FUTEX_LOCK_PI:
2587 if (futex_cmpxchg_enabled) 2573 if (futex_cmpxchg_enabled)
2588 ret = futex_lock_pi(uaddr, fshared, val, timeout, 0); 2574 ret = futex_lock_pi(uaddr, flags, val, timeout, 0);
2589 break; 2575 break;
2590 case FUTEX_UNLOCK_PI: 2576 case FUTEX_UNLOCK_PI:
2591 if (futex_cmpxchg_enabled) 2577 if (futex_cmpxchg_enabled)
2592 ret = futex_unlock_pi(uaddr, fshared); 2578 ret = futex_unlock_pi(uaddr, flags);
2593 break; 2579 break;
2594 case FUTEX_TRYLOCK_PI: 2580 case FUTEX_TRYLOCK_PI:
2595 if (futex_cmpxchg_enabled) 2581 if (futex_cmpxchg_enabled)
2596 ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); 2582 ret = futex_lock_pi(uaddr, flags, 0, timeout, 1);
2597 break; 2583 break;
2598 case FUTEX_WAIT_REQUEUE_PI: 2584 case FUTEX_WAIT_REQUEUE_PI:
2599 val3 = FUTEX_BITSET_MATCH_ANY; 2585 val3 = FUTEX_BITSET_MATCH_ANY;
2600 ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3, 2586 ret = futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
2601 clockrt, uaddr2); 2587 uaddr2);
2602 break; 2588 break;
2603 case FUTEX_CMP_REQUEUE_PI: 2589 case FUTEX_CMP_REQUEUE_PI:
2604 ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, 2590 ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
2605 1);
2606 break; 2591 break;
2607 default: 2592 default:
2608 ret = -ENOSYS; 2593 ret = -ENOSYS;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 72206cf5c6cf..f2429fc3438c 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -516,10 +516,13 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
516 516
517 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { 517 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
518 struct hrtimer *timer; 518 struct hrtimer *timer;
519 struct timerqueue_node *next;
519 520
520 if (!base->first) 521 next = timerqueue_getnext(&base->active);
522 if (!next)
521 continue; 523 continue;
522 timer = rb_entry(base->first, struct hrtimer, node); 524 timer = container_of(next, struct hrtimer, node);
525
523 expires = ktime_sub(hrtimer_get_expires(timer), base->offset); 526 expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
524 /* 527 /*
525 * clock_was_set() has changed base->offset so the 528 * clock_was_set() has changed base->offset so the
@@ -840,48 +843,17 @@ EXPORT_SYMBOL_GPL(hrtimer_forward);
840static int enqueue_hrtimer(struct hrtimer *timer, 843static int enqueue_hrtimer(struct hrtimer *timer,
841 struct hrtimer_clock_base *base) 844 struct hrtimer_clock_base *base)
842{ 845{
843 struct rb_node **link = &base->active.rb_node;
844 struct rb_node *parent = NULL;
845 struct hrtimer *entry;
846 int leftmost = 1;
847
848 debug_activate(timer); 846 debug_activate(timer);
849 847
850 /* 848 timerqueue_add(&base->active, &timer->node);
851 * Find the right place in the rbtree:
852 */
853 while (*link) {
854 parent = *link;
855 entry = rb_entry(parent, struct hrtimer, node);
856 /*
857 * We dont care about collisions. Nodes with
858 * the same expiry time stay together.
859 */
860 if (hrtimer_get_expires_tv64(timer) <
861 hrtimer_get_expires_tv64(entry)) {
862 link = &(*link)->rb_left;
863 } else {
864 link = &(*link)->rb_right;
865 leftmost = 0;
866 }
867 }
868
869 /*
870 * Insert the timer to the rbtree and check whether it
871 * replaces the first pending timer
872 */
873 if (leftmost)
874 base->first = &timer->node;
875 849
876 rb_link_node(&timer->node, parent, link);
877 rb_insert_color(&timer->node, &base->active);
878 /* 850 /*
879 * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the 851 * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the
880 * state of a possibly running callback. 852 * state of a possibly running callback.
881 */ 853 */
882 timer->state |= HRTIMER_STATE_ENQUEUED; 854 timer->state |= HRTIMER_STATE_ENQUEUED;
883 855
884 return leftmost; 856 return (&timer->node == base->active.next);
885} 857}
886 858
887/* 859/*
@@ -901,12 +873,7 @@ static void __remove_hrtimer(struct hrtimer *timer,
901 if (!(timer->state & HRTIMER_STATE_ENQUEUED)) 873 if (!(timer->state & HRTIMER_STATE_ENQUEUED))
902 goto out; 874 goto out;
903 875
904 /* 876 if (&timer->node == timerqueue_getnext(&base->active)) {
905 * Remove the timer from the rbtree and replace the first
906 * entry pointer if necessary.
907 */
908 if (base->first == &timer->node) {
909 base->first = rb_next(&timer->node);
910#ifdef CONFIG_HIGH_RES_TIMERS 877#ifdef CONFIG_HIGH_RES_TIMERS
911 /* Reprogram the clock event device. if enabled */ 878 /* Reprogram the clock event device. if enabled */
912 if (reprogram && hrtimer_hres_active()) { 879 if (reprogram && hrtimer_hres_active()) {
@@ -919,7 +886,7 @@ static void __remove_hrtimer(struct hrtimer *timer,
919 } 886 }
920#endif 887#endif
921 } 888 }
922 rb_erase(&timer->node, &base->active); 889 timerqueue_del(&base->active, &timer->node);
923out: 890out:
924 timer->state = newstate; 891 timer->state = newstate;
925} 892}
@@ -1128,11 +1095,13 @@ ktime_t hrtimer_get_next_event(void)
1128 if (!hrtimer_hres_active()) { 1095 if (!hrtimer_hres_active()) {
1129 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { 1096 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
1130 struct hrtimer *timer; 1097 struct hrtimer *timer;
1098 struct timerqueue_node *next;
1131 1099
1132 if (!base->first) 1100 next = timerqueue_getnext(&base->active);
1101 if (!next)
1133 continue; 1102 continue;
1134 1103
1135 timer = rb_entry(base->first, struct hrtimer, node); 1104 timer = container_of(next, struct hrtimer, node);
1136 delta.tv64 = hrtimer_get_expires_tv64(timer); 1105 delta.tv64 = hrtimer_get_expires_tv64(timer);
1137 delta = ktime_sub(delta, base->get_time()); 1106 delta = ktime_sub(delta, base->get_time());
1138 if (delta.tv64 < mindelta.tv64) 1107 if (delta.tv64 < mindelta.tv64)
@@ -1162,6 +1131,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
1162 1131
1163 timer->base = &cpu_base->clock_base[clock_id]; 1132 timer->base = &cpu_base->clock_base[clock_id];
1164 hrtimer_init_timer_hres(timer); 1133 hrtimer_init_timer_hres(timer);
1134 timerqueue_init(&timer->node);
1165 1135
1166#ifdef CONFIG_TIMER_STATS 1136#ifdef CONFIG_TIMER_STATS
1167 timer->start_site = NULL; 1137 timer->start_site = NULL;
@@ -1278,14 +1248,14 @@ retry:
1278 1248
1279 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 1249 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
1280 ktime_t basenow; 1250 ktime_t basenow;
1281 struct rb_node *node; 1251 struct timerqueue_node *node;
1282 1252
1283 basenow = ktime_add(now, base->offset); 1253 basenow = ktime_add(now, base->offset);
1284 1254
1285 while ((node = base->first)) { 1255 while ((node = timerqueue_getnext(&base->active))) {
1286 struct hrtimer *timer; 1256 struct hrtimer *timer;
1287 1257
1288 timer = rb_entry(node, struct hrtimer, node); 1258 timer = container_of(node, struct hrtimer, node);
1289 1259
1290 /* 1260 /*
1291 * The immediate goal for using the softexpires is 1261 * The immediate goal for using the softexpires is
@@ -1441,7 +1411,7 @@ void hrtimer_run_pending(void)
1441 */ 1411 */
1442void hrtimer_run_queues(void) 1412void hrtimer_run_queues(void)
1443{ 1413{
1444 struct rb_node *node; 1414 struct timerqueue_node *node;
1445 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); 1415 struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
1446 struct hrtimer_clock_base *base; 1416 struct hrtimer_clock_base *base;
1447 int index, gettime = 1; 1417 int index, gettime = 1;
@@ -1451,8 +1421,7 @@ void hrtimer_run_queues(void)
1451 1421
1452 for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) { 1422 for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
1453 base = &cpu_base->clock_base[index]; 1423 base = &cpu_base->clock_base[index];
1454 1424 if (!timerqueue_getnext(&base->active))
1455 if (!base->first)
1456 continue; 1425 continue;
1457 1426
1458 if (gettime) { 1427 if (gettime) {
@@ -1462,10 +1431,10 @@ void hrtimer_run_queues(void)
1462 1431
1463 raw_spin_lock(&cpu_base->lock); 1432 raw_spin_lock(&cpu_base->lock);
1464 1433
1465 while ((node = base->first)) { 1434 while ((node = timerqueue_getnext(&base->active))) {
1466 struct hrtimer *timer; 1435 struct hrtimer *timer;
1467 1436
1468 timer = rb_entry(node, struct hrtimer, node); 1437 timer = container_of(node, struct hrtimer, node);
1469 if (base->softirq_time.tv64 <= 1438 if (base->softirq_time.tv64 <=
1470 hrtimer_get_expires_tv64(timer)) 1439 hrtimer_get_expires_tv64(timer))
1471 break; 1440 break;
@@ -1630,8 +1599,10 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
1630 1599
1631 raw_spin_lock_init(&cpu_base->lock); 1600 raw_spin_lock_init(&cpu_base->lock);
1632 1601
1633 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) 1602 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
1634 cpu_base->clock_base[i].cpu_base = cpu_base; 1603 cpu_base->clock_base[i].cpu_base = cpu_base;
1604 timerqueue_init_head(&cpu_base->clock_base[i].active);
1605 }
1635 1606
1636 hrtimer_init_hres(cpu_base); 1607 hrtimer_init_hres(cpu_base);
1637} 1608}
@@ -1642,10 +1613,10 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
1642 struct hrtimer_clock_base *new_base) 1613 struct hrtimer_clock_base *new_base)
1643{ 1614{
1644 struct hrtimer *timer; 1615 struct hrtimer *timer;
1645 struct rb_node *node; 1616 struct timerqueue_node *node;
1646 1617
1647 while ((node = rb_first(&old_base->active))) { 1618 while ((node = timerqueue_getnext(&old_base->active))) {
1648 timer = rb_entry(node, struct hrtimer, node); 1619 timer = container_of(node, struct hrtimer, node);
1649 BUG_ON(hrtimer_callback_running(timer)); 1620 BUG_ON(hrtimer_callback_running(timer));
1650 debug_deactivate(timer); 1621 debug_deactivate(timer);
1651 1622
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index e5325825aeb6..086adf25a55e 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -641,7 +641,7 @@ int __init init_hw_breakpoint(void)
641 641
642 constraints_initialized = 1; 642 constraints_initialized = 1;
643 643
644 perf_pmu_register(&perf_breakpoint); 644 perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
645 645
646 return register_die_notifier(&hw_breakpoint_exceptions_nb); 646 return register_die_notifier(&hw_breakpoint_exceptions_nb);
647 647
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 5f92acc5f952..91a5fa25054e 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -577,7 +577,9 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { }
577 */ 577 */
578static int irq_thread(void *data) 578static int irq_thread(void *data)
579{ 579{
580 struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, }; 580 static struct sched_param param = {
581 .sched_priority = MAX_USER_RT_PRIO/2,
582 };
581 struct irqaction *action = data; 583 struct irqaction *action = data;
582 struct irq_desc *desc = irq_to_desc(action->irq); 584 struct irq_desc *desc = irq_to_desc(action->irq);
583 int wake, oneshot = desc->status & IRQ_ONESHOT; 585 int wake, oneshot = desc->status & IRQ_ONESHOT;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9737a76e106f..7663e5df0e6f 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -354,13 +354,20 @@ static inline int kprobe_aggrprobe(struct kprobe *p)
354 return p->pre_handler == aggr_pre_handler; 354 return p->pre_handler == aggr_pre_handler;
355} 355}
356 356
357/* Return true(!0) if the kprobe is unused */
358static inline int kprobe_unused(struct kprobe *p)
359{
360 return kprobe_aggrprobe(p) && kprobe_disabled(p) &&
361 list_empty(&p->list);
362}
363
357/* 364/*
358 * Keep all fields in the kprobe consistent 365 * Keep all fields in the kprobe consistent
359 */ 366 */
360static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p) 367static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p)
361{ 368{
362 memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t)); 369 memcpy(&p->opcode, &ap->opcode, sizeof(kprobe_opcode_t));
363 memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn)); 370 memcpy(&p->ainsn, &ap->ainsn, sizeof(struct arch_specific_insn));
364} 371}
365 372
366#ifdef CONFIG_OPTPROBES 373#ifdef CONFIG_OPTPROBES
@@ -384,6 +391,17 @@ void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
384 } 391 }
385} 392}
386 393
394/* Free optimized instructions and optimized_kprobe */
395static __kprobes void free_aggr_kprobe(struct kprobe *p)
396{
397 struct optimized_kprobe *op;
398
399 op = container_of(p, struct optimized_kprobe, kp);
400 arch_remove_optimized_kprobe(op);
401 arch_remove_kprobe(p);
402 kfree(op);
403}
404
387/* Return true(!0) if the kprobe is ready for optimization. */ 405/* Return true(!0) if the kprobe is ready for optimization. */
388static inline int kprobe_optready(struct kprobe *p) 406static inline int kprobe_optready(struct kprobe *p)
389{ 407{
@@ -397,6 +415,33 @@ static inline int kprobe_optready(struct kprobe *p)
397 return 0; 415 return 0;
398} 416}
399 417
418/* Return true(!0) if the kprobe is disarmed. Note: p must be on hash list */
419static inline int kprobe_disarmed(struct kprobe *p)
420{
421 struct optimized_kprobe *op;
422
423 /* If kprobe is not aggr/opt probe, just return kprobe is disabled */
424 if (!kprobe_aggrprobe(p))
425 return kprobe_disabled(p);
426
427 op = container_of(p, struct optimized_kprobe, kp);
428
429 return kprobe_disabled(p) && list_empty(&op->list);
430}
431
432/* Return true(!0) if the probe is queued on (un)optimizing lists */
433static int __kprobes kprobe_queued(struct kprobe *p)
434{
435 struct optimized_kprobe *op;
436
437 if (kprobe_aggrprobe(p)) {
438 op = container_of(p, struct optimized_kprobe, kp);
439 if (!list_empty(&op->list))
440 return 1;
441 }
442 return 0;
443}
444
400/* 445/*
401 * Return an optimized kprobe whose optimizing code replaces 446 * Return an optimized kprobe whose optimizing code replaces
402 * instructions including addr (exclude breakpoint). 447 * instructions including addr (exclude breakpoint).
@@ -422,30 +467,23 @@ static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr)
422 467
423/* Optimization staging list, protected by kprobe_mutex */ 468/* Optimization staging list, protected by kprobe_mutex */
424static LIST_HEAD(optimizing_list); 469static LIST_HEAD(optimizing_list);
470static LIST_HEAD(unoptimizing_list);
425 471
426static void kprobe_optimizer(struct work_struct *work); 472static void kprobe_optimizer(struct work_struct *work);
427static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer); 473static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
474static DECLARE_COMPLETION(optimizer_comp);
428#define OPTIMIZE_DELAY 5 475#define OPTIMIZE_DELAY 5
429 476
430/* Kprobe jump optimizer */ 477/*
431static __kprobes void kprobe_optimizer(struct work_struct *work) 478 * Optimize (replace a breakpoint with a jump) kprobes listed on
479 * optimizing_list.
480 */
481static __kprobes void do_optimize_kprobes(void)
432{ 482{
433 struct optimized_kprobe *op, *tmp; 483 /* Optimization never be done when disarmed */
434 484 if (kprobes_all_disarmed || !kprobes_allow_optimization ||
435 /* Lock modules while optimizing kprobes */ 485 list_empty(&optimizing_list))
436 mutex_lock(&module_mutex); 486 return;
437 mutex_lock(&kprobe_mutex);
438 if (kprobes_all_disarmed || !kprobes_allow_optimization)
439 goto end;
440
441 /*
442 * Wait for quiesence period to ensure all running interrupts
443 * are done. Because optprobe may modify multiple instructions
444 * there is a chance that Nth instruction is interrupted. In that
445 * case, running interrupt can return to 2nd-Nth byte of jump
446 * instruction. This wait is for avoiding it.
447 */
448 synchronize_sched();
449 487
450 /* 488 /*
451 * The optimization/unoptimization refers online_cpus via 489 * The optimization/unoptimization refers online_cpus via
@@ -459,17 +497,111 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
459 */ 497 */
460 get_online_cpus(); 498 get_online_cpus();
461 mutex_lock(&text_mutex); 499 mutex_lock(&text_mutex);
462 list_for_each_entry_safe(op, tmp, &optimizing_list, list) { 500 arch_optimize_kprobes(&optimizing_list);
463 WARN_ON(kprobe_disabled(&op->kp)); 501 mutex_unlock(&text_mutex);
464 if (arch_optimize_kprobe(op) < 0) 502 put_online_cpus();
465 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; 503}
466 list_del_init(&op->list); 504
505/*
506 * Unoptimize (replace a jump with a breakpoint and remove the breakpoint
507 * if need) kprobes listed on unoptimizing_list.
508 */
509static __kprobes void do_unoptimize_kprobes(struct list_head *free_list)
510{
511 struct optimized_kprobe *op, *tmp;
512
513 /* Unoptimization must be done anytime */
514 if (list_empty(&unoptimizing_list))
515 return;
516
517 /* Ditto to do_optimize_kprobes */
518 get_online_cpus();
519 mutex_lock(&text_mutex);
520 arch_unoptimize_kprobes(&unoptimizing_list, free_list);
521 /* Loop free_list for disarming */
522 list_for_each_entry_safe(op, tmp, free_list, list) {
523 /* Disarm probes if marked disabled */
524 if (kprobe_disabled(&op->kp))
525 arch_disarm_kprobe(&op->kp);
526 if (kprobe_unused(&op->kp)) {
527 /*
528 * Remove unused probes from hash list. After waiting
529 * for synchronization, these probes are reclaimed.
530 * (reclaiming is done by do_free_cleaned_kprobes.)
531 */
532 hlist_del_rcu(&op->kp.hlist);
533 } else
534 list_del_init(&op->list);
467 } 535 }
468 mutex_unlock(&text_mutex); 536 mutex_unlock(&text_mutex);
469 put_online_cpus(); 537 put_online_cpus();
470end: 538}
539
540/* Reclaim all kprobes on the free_list */
541static __kprobes void do_free_cleaned_kprobes(struct list_head *free_list)
542{
543 struct optimized_kprobe *op, *tmp;
544
545 list_for_each_entry_safe(op, tmp, free_list, list) {
546 BUG_ON(!kprobe_unused(&op->kp));
547 list_del_init(&op->list);
548 free_aggr_kprobe(&op->kp);
549 }
550}
551
552/* Start optimizer after OPTIMIZE_DELAY passed */
553static __kprobes void kick_kprobe_optimizer(void)
554{
555 if (!delayed_work_pending(&optimizing_work))
556 schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
557}
558
559/* Kprobe jump optimizer */
560static __kprobes void kprobe_optimizer(struct work_struct *work)
561{
562 LIST_HEAD(free_list);
563
564 /* Lock modules while optimizing kprobes */
565 mutex_lock(&module_mutex);
566 mutex_lock(&kprobe_mutex);
567
568 /*
569 * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
570 * kprobes before waiting for quiesence period.
571 */
572 do_unoptimize_kprobes(&free_list);
573
574 /*
575 * Step 2: Wait for quiesence period to ensure all running interrupts
576 * are done. Because optprobe may modify multiple instructions
577 * there is a chance that Nth instruction is interrupted. In that
578 * case, running interrupt can return to 2nd-Nth byte of jump
579 * instruction. This wait is for avoiding it.
580 */
581 synchronize_sched();
582
583 /* Step 3: Optimize kprobes after quiesence period */
584 do_optimize_kprobes();
585
586 /* Step 4: Free cleaned kprobes after quiesence period */
587 do_free_cleaned_kprobes(&free_list);
588
471 mutex_unlock(&kprobe_mutex); 589 mutex_unlock(&kprobe_mutex);
472 mutex_unlock(&module_mutex); 590 mutex_unlock(&module_mutex);
591
592 /* Step 5: Kick optimizer again if needed */
593 if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))
594 kick_kprobe_optimizer();
595 else
596 /* Wake up all waiters */
597 complete_all(&optimizer_comp);
598}
599
600/* Wait for completing optimization and unoptimization */
601static __kprobes void wait_for_kprobe_optimizer(void)
602{
603 if (delayed_work_pending(&optimizing_work))
604 wait_for_completion(&optimizer_comp);
473} 605}
474 606
475/* Optimize kprobe if p is ready to be optimized */ 607/* Optimize kprobe if p is ready to be optimized */
@@ -495,42 +627,99 @@ static __kprobes void optimize_kprobe(struct kprobe *p)
495 /* Check if it is already optimized. */ 627 /* Check if it is already optimized. */
496 if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) 628 if (op->kp.flags & KPROBE_FLAG_OPTIMIZED)
497 return; 629 return;
498
499 op->kp.flags |= KPROBE_FLAG_OPTIMIZED; 630 op->kp.flags |= KPROBE_FLAG_OPTIMIZED;
500 list_add(&op->list, &optimizing_list); 631
501 if (!delayed_work_pending(&optimizing_work)) 632 if (!list_empty(&op->list))
502 schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY); 633 /* This is under unoptimizing. Just dequeue the probe */
634 list_del_init(&op->list);
635 else {
636 list_add(&op->list, &optimizing_list);
637 kick_kprobe_optimizer();
638 }
639}
640
641/* Short cut to direct unoptimizing */
642static __kprobes void force_unoptimize_kprobe(struct optimized_kprobe *op)
643{
644 get_online_cpus();
645 arch_unoptimize_kprobe(op);
646 put_online_cpus();
647 if (kprobe_disabled(&op->kp))
648 arch_disarm_kprobe(&op->kp);
503} 649}
504 650
505/* Unoptimize a kprobe if p is optimized */ 651/* Unoptimize a kprobe if p is optimized */
506static __kprobes void unoptimize_kprobe(struct kprobe *p) 652static __kprobes void unoptimize_kprobe(struct kprobe *p, bool force)
507{ 653{
508 struct optimized_kprobe *op; 654 struct optimized_kprobe *op;
509 655
510 if ((p->flags & KPROBE_FLAG_OPTIMIZED) && kprobe_aggrprobe(p)) { 656 if (!kprobe_aggrprobe(p) || kprobe_disarmed(p))
511 op = container_of(p, struct optimized_kprobe, kp); 657 return; /* This is not an optprobe nor optimized */
512 if (!list_empty(&op->list)) 658
513 /* Dequeue from the optimization queue */ 659 op = container_of(p, struct optimized_kprobe, kp);
660 if (!kprobe_optimized(p)) {
661 /* Unoptimized or unoptimizing case */
662 if (force && !list_empty(&op->list)) {
663 /*
664 * Only if this is unoptimizing kprobe and forced,
665 * forcibly unoptimize it. (No need to unoptimize
666 * unoptimized kprobe again :)
667 */
514 list_del_init(&op->list); 668 list_del_init(&op->list);
515 else 669 force_unoptimize_kprobe(op);
516 /* Replace jump with break */ 670 }
517 arch_unoptimize_kprobe(op); 671 return;
518 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; 672 }
673
674 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
675 if (!list_empty(&op->list)) {
676 /* Dequeue from the optimization queue */
677 list_del_init(&op->list);
678 return;
679 }
680 /* Optimized kprobe case */
681 if (force)
682 /* Forcibly update the code: this is a special case */
683 force_unoptimize_kprobe(op);
684 else {
685 list_add(&op->list, &unoptimizing_list);
686 kick_kprobe_optimizer();
519 } 687 }
520} 688}
521 689
690/* Cancel unoptimizing for reusing */
691static void reuse_unused_kprobe(struct kprobe *ap)
692{
693 struct optimized_kprobe *op;
694
695 BUG_ON(!kprobe_unused(ap));
696 /*
697 * Unused kprobe MUST be on the way of delayed unoptimizing (means
698 * there is still a relative jump) and disabled.
699 */
700 op = container_of(ap, struct optimized_kprobe, kp);
701 if (unlikely(list_empty(&op->list)))
702 printk(KERN_WARNING "Warning: found a stray unused "
703 "aggrprobe@%p\n", ap->addr);
704 /* Enable the probe again */
705 ap->flags &= ~KPROBE_FLAG_DISABLED;
706 /* Optimize it again (remove from op->list) */
707 BUG_ON(!kprobe_optready(ap));
708 optimize_kprobe(ap);
709}
710
522/* Remove optimized instructions */ 711/* Remove optimized instructions */
523static void __kprobes kill_optimized_kprobe(struct kprobe *p) 712static void __kprobes kill_optimized_kprobe(struct kprobe *p)
524{ 713{
525 struct optimized_kprobe *op; 714 struct optimized_kprobe *op;
526 715
527 op = container_of(p, struct optimized_kprobe, kp); 716 op = container_of(p, struct optimized_kprobe, kp);
528 if (!list_empty(&op->list)) { 717 if (!list_empty(&op->list))
529 /* Dequeue from the optimization queue */ 718 /* Dequeue from the (un)optimization queue */
530 list_del_init(&op->list); 719 list_del_init(&op->list);
531 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; 720
532 } 721 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
533 /* Don't unoptimize, because the target code will be freed. */ 722 /* Don't touch the code, because it is already freed. */
534 arch_remove_optimized_kprobe(op); 723 arch_remove_optimized_kprobe(op);
535} 724}
536 725
@@ -543,16 +732,6 @@ static __kprobes void prepare_optimized_kprobe(struct kprobe *p)
543 arch_prepare_optimized_kprobe(op); 732 arch_prepare_optimized_kprobe(op);
544} 733}
545 734
546/* Free optimized instructions and optimized_kprobe */
547static __kprobes void free_aggr_kprobe(struct kprobe *p)
548{
549 struct optimized_kprobe *op;
550
551 op = container_of(p, struct optimized_kprobe, kp);
552 arch_remove_optimized_kprobe(op);
553 kfree(op);
554}
555
556/* Allocate new optimized_kprobe and try to prepare optimized instructions */ 735/* Allocate new optimized_kprobe and try to prepare optimized instructions */
557static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p) 736static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
558{ 737{
@@ -587,7 +766,8 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
587 op = container_of(ap, struct optimized_kprobe, kp); 766 op = container_of(ap, struct optimized_kprobe, kp);
588 if (!arch_prepared_optinsn(&op->optinsn)) { 767 if (!arch_prepared_optinsn(&op->optinsn)) {
589 /* If failed to setup optimizing, fallback to kprobe */ 768 /* If failed to setup optimizing, fallback to kprobe */
590 free_aggr_kprobe(ap); 769 arch_remove_optimized_kprobe(op);
770 kfree(op);
591 return; 771 return;
592 } 772 }
593 773
@@ -631,21 +811,16 @@ static void __kprobes unoptimize_all_kprobes(void)
631 return; 811 return;
632 812
633 kprobes_allow_optimization = false; 813 kprobes_allow_optimization = false;
634 printk(KERN_INFO "Kprobes globally unoptimized\n");
635 get_online_cpus(); /* For avoiding text_mutex deadlock */
636 mutex_lock(&text_mutex);
637 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 814 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
638 head = &kprobe_table[i]; 815 head = &kprobe_table[i];
639 hlist_for_each_entry_rcu(p, node, head, hlist) { 816 hlist_for_each_entry_rcu(p, node, head, hlist) {
640 if (!kprobe_disabled(p)) 817 if (!kprobe_disabled(p))
641 unoptimize_kprobe(p); 818 unoptimize_kprobe(p, false);
642 } 819 }
643 } 820 }
644 821 /* Wait for unoptimizing completion */
645 mutex_unlock(&text_mutex); 822 wait_for_kprobe_optimizer();
646 put_online_cpus(); 823 printk(KERN_INFO "Kprobes globally unoptimized\n");
647 /* Allow all currently running kprobes to complete */
648 synchronize_sched();
649} 824}
650 825
651int sysctl_kprobes_optimization; 826int sysctl_kprobes_optimization;
@@ -669,44 +844,60 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
669} 844}
670#endif /* CONFIG_SYSCTL */ 845#endif /* CONFIG_SYSCTL */
671 846
847/* Put a breakpoint for a probe. Must be called with text_mutex locked */
672static void __kprobes __arm_kprobe(struct kprobe *p) 848static void __kprobes __arm_kprobe(struct kprobe *p)
673{ 849{
674 struct kprobe *old_p; 850 struct kprobe *_p;
675 851
676 /* Check collision with other optimized kprobes */ 852 /* Check collision with other optimized kprobes */
677 old_p = get_optimized_kprobe((unsigned long)p->addr); 853 _p = get_optimized_kprobe((unsigned long)p->addr);
678 if (unlikely(old_p)) 854 if (unlikely(_p))
679 unoptimize_kprobe(old_p); /* Fallback to unoptimized kprobe */ 855 /* Fallback to unoptimized kprobe */
856 unoptimize_kprobe(_p, true);
680 857
681 arch_arm_kprobe(p); 858 arch_arm_kprobe(p);
682 optimize_kprobe(p); /* Try to optimize (add kprobe to a list) */ 859 optimize_kprobe(p); /* Try to optimize (add kprobe to a list) */
683} 860}
684 861
685static void __kprobes __disarm_kprobe(struct kprobe *p) 862/* Remove the breakpoint of a probe. Must be called with text_mutex locked */
863static void __kprobes __disarm_kprobe(struct kprobe *p, bool reopt)
686{ 864{
687 struct kprobe *old_p; 865 struct kprobe *_p;
688 866
689 unoptimize_kprobe(p); /* Try to unoptimize */ 867 unoptimize_kprobe(p, false); /* Try to unoptimize */
690 arch_disarm_kprobe(p);
691 868
692 /* If another kprobe was blocked, optimize it. */ 869 if (!kprobe_queued(p)) {
693 old_p = get_optimized_kprobe((unsigned long)p->addr); 870 arch_disarm_kprobe(p);
694 if (unlikely(old_p)) 871 /* If another kprobe was blocked, optimize it. */
695 optimize_kprobe(old_p); 872 _p = get_optimized_kprobe((unsigned long)p->addr);
873 if (unlikely(_p) && reopt)
874 optimize_kprobe(_p);
875 }
876 /* TODO: reoptimize others after unoptimized this probe */
696} 877}
697 878
698#else /* !CONFIG_OPTPROBES */ 879#else /* !CONFIG_OPTPROBES */
699 880
700#define optimize_kprobe(p) do {} while (0) 881#define optimize_kprobe(p) do {} while (0)
701#define unoptimize_kprobe(p) do {} while (0) 882#define unoptimize_kprobe(p, f) do {} while (0)
702#define kill_optimized_kprobe(p) do {} while (0) 883#define kill_optimized_kprobe(p) do {} while (0)
703#define prepare_optimized_kprobe(p) do {} while (0) 884#define prepare_optimized_kprobe(p) do {} while (0)
704#define try_to_optimize_kprobe(p) do {} while (0) 885#define try_to_optimize_kprobe(p) do {} while (0)
705#define __arm_kprobe(p) arch_arm_kprobe(p) 886#define __arm_kprobe(p) arch_arm_kprobe(p)
706#define __disarm_kprobe(p) arch_disarm_kprobe(p) 887#define __disarm_kprobe(p, o) arch_disarm_kprobe(p)
888#define kprobe_disarmed(p) kprobe_disabled(p)
889#define wait_for_kprobe_optimizer() do {} while (0)
890
891/* There should be no unused kprobes can be reused without optimization */
892static void reuse_unused_kprobe(struct kprobe *ap)
893{
894 printk(KERN_ERR "Error: There should be no unused kprobe here.\n");
895 BUG_ON(kprobe_unused(ap));
896}
707 897
708static __kprobes void free_aggr_kprobe(struct kprobe *p) 898static __kprobes void free_aggr_kprobe(struct kprobe *p)
709{ 899{
900 arch_remove_kprobe(p);
710 kfree(p); 901 kfree(p);
711} 902}
712 903
@@ -732,11 +923,10 @@ static void __kprobes arm_kprobe(struct kprobe *kp)
732/* Disarm a kprobe with text_mutex */ 923/* Disarm a kprobe with text_mutex */
733static void __kprobes disarm_kprobe(struct kprobe *kp) 924static void __kprobes disarm_kprobe(struct kprobe *kp)
734{ 925{
735 get_online_cpus(); /* For avoiding text_mutex deadlock */ 926 /* Ditto */
736 mutex_lock(&text_mutex); 927 mutex_lock(&text_mutex);
737 __disarm_kprobe(kp); 928 __disarm_kprobe(kp, true);
738 mutex_unlock(&text_mutex); 929 mutex_unlock(&text_mutex);
739 put_online_cpus();
740} 930}
741 931
742/* 932/*
@@ -942,7 +1132,7 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
942 BUG_ON(kprobe_gone(ap) || kprobe_gone(p)); 1132 BUG_ON(kprobe_gone(ap) || kprobe_gone(p));
943 1133
944 if (p->break_handler || p->post_handler) 1134 if (p->break_handler || p->post_handler)
945 unoptimize_kprobe(ap); /* Fall back to normal kprobe */ 1135 unoptimize_kprobe(ap, true); /* Fall back to normal kprobe */
946 1136
947 if (p->break_handler) { 1137 if (p->break_handler) {
948 if (ap->break_handler) 1138 if (ap->break_handler)
@@ -993,19 +1183,21 @@ static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
993 * This is the second or subsequent kprobe at the address - handle 1183 * This is the second or subsequent kprobe at the address - handle
994 * the intricacies 1184 * the intricacies
995 */ 1185 */
996static int __kprobes register_aggr_kprobe(struct kprobe *old_p, 1186static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
997 struct kprobe *p) 1187 struct kprobe *p)
998{ 1188{
999 int ret = 0; 1189 int ret = 0;
1000 struct kprobe *ap = old_p; 1190 struct kprobe *ap = orig_p;
1001 1191
1002 if (!kprobe_aggrprobe(old_p)) { 1192 if (!kprobe_aggrprobe(orig_p)) {
1003 /* If old_p is not an aggr_kprobe, create new aggr_kprobe. */ 1193 /* If orig_p is not an aggr_kprobe, create new aggr_kprobe. */
1004 ap = alloc_aggr_kprobe(old_p); 1194 ap = alloc_aggr_kprobe(orig_p);
1005 if (!ap) 1195 if (!ap)
1006 return -ENOMEM; 1196 return -ENOMEM;
1007 init_aggr_kprobe(ap, old_p); 1197 init_aggr_kprobe(ap, orig_p);
1008 } 1198 } else if (kprobe_unused(ap))
1199 /* This probe is going to die. Rescue it */
1200 reuse_unused_kprobe(ap);
1009 1201
1010 if (kprobe_gone(ap)) { 1202 if (kprobe_gone(ap)) {
1011 /* 1203 /*
@@ -1039,23 +1231,6 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
1039 return add_new_kprobe(ap, p); 1231 return add_new_kprobe(ap, p);
1040} 1232}
1041 1233
1042/* Try to disable aggr_kprobe, and return 1 if succeeded.*/
1043static int __kprobes try_to_disable_aggr_kprobe(struct kprobe *p)
1044{
1045 struct kprobe *kp;
1046
1047 list_for_each_entry_rcu(kp, &p->list, list) {
1048 if (!kprobe_disabled(kp))
1049 /*
1050 * There is an active probe on the list.
1051 * We can't disable aggr_kprobe.
1052 */
1053 return 0;
1054 }
1055 p->flags |= KPROBE_FLAG_DISABLED;
1056 return 1;
1057}
1058
1059static int __kprobes in_kprobes_functions(unsigned long addr) 1234static int __kprobes in_kprobes_functions(unsigned long addr)
1060{ 1235{
1061 struct kprobe_blackpoint *kb; 1236 struct kprobe_blackpoint *kb;
@@ -1098,34 +1273,33 @@ static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
1098/* Check passed kprobe is valid and return kprobe in kprobe_table. */ 1273/* Check passed kprobe is valid and return kprobe in kprobe_table. */
1099static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p) 1274static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
1100{ 1275{
1101 struct kprobe *old_p, *list_p; 1276 struct kprobe *ap, *list_p;
1102 1277
1103 old_p = get_kprobe(p->addr); 1278 ap = get_kprobe(p->addr);
1104 if (unlikely(!old_p)) 1279 if (unlikely(!ap))
1105 return NULL; 1280 return NULL;
1106 1281
1107 if (p != old_p) { 1282 if (p != ap) {
1108 list_for_each_entry_rcu(list_p, &old_p->list, list) 1283 list_for_each_entry_rcu(list_p, &ap->list, list)
1109 if (list_p == p) 1284 if (list_p == p)
1110 /* kprobe p is a valid probe */ 1285 /* kprobe p is a valid probe */
1111 goto valid; 1286 goto valid;
1112 return NULL; 1287 return NULL;
1113 } 1288 }
1114valid: 1289valid:
1115 return old_p; 1290 return ap;
1116} 1291}
1117 1292
1118/* Return error if the kprobe is being re-registered */ 1293/* Return error if the kprobe is being re-registered */
1119static inline int check_kprobe_rereg(struct kprobe *p) 1294static inline int check_kprobe_rereg(struct kprobe *p)
1120{ 1295{
1121 int ret = 0; 1296 int ret = 0;
1122 struct kprobe *old_p;
1123 1297
1124 mutex_lock(&kprobe_mutex); 1298 mutex_lock(&kprobe_mutex);
1125 old_p = __get_valid_kprobe(p); 1299 if (__get_valid_kprobe(p))
1126 if (old_p)
1127 ret = -EINVAL; 1300 ret = -EINVAL;
1128 mutex_unlock(&kprobe_mutex); 1301 mutex_unlock(&kprobe_mutex);
1302
1129 return ret; 1303 return ret;
1130} 1304}
1131 1305
@@ -1229,67 +1403,121 @@ fail_with_jump_label:
1229} 1403}
1230EXPORT_SYMBOL_GPL(register_kprobe); 1404EXPORT_SYMBOL_GPL(register_kprobe);
1231 1405
1406/* Check if all probes on the aggrprobe are disabled */
1407static int __kprobes aggr_kprobe_disabled(struct kprobe *ap)
1408{
1409 struct kprobe *kp;
1410
1411 list_for_each_entry_rcu(kp, &ap->list, list)
1412 if (!kprobe_disabled(kp))
1413 /*
1414 * There is an active probe on the list.
1415 * We can't disable this ap.
1416 */
1417 return 0;
1418
1419 return 1;
1420}
1421
1422/* Disable one kprobe: Make sure called under kprobe_mutex is locked */
1423static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p)
1424{
1425 struct kprobe *orig_p;
1426
1427 /* Get an original kprobe for return */
1428 orig_p = __get_valid_kprobe(p);
1429 if (unlikely(orig_p == NULL))
1430 return NULL;
1431
1432 if (!kprobe_disabled(p)) {
1433 /* Disable probe if it is a child probe */
1434 if (p != orig_p)
1435 p->flags |= KPROBE_FLAG_DISABLED;
1436
1437 /* Try to disarm and disable this/parent probe */
1438 if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
1439 disarm_kprobe(orig_p);
1440 orig_p->flags |= KPROBE_FLAG_DISABLED;
1441 }
1442 }
1443
1444 return orig_p;
1445}
1446
1232/* 1447/*
1233 * Unregister a kprobe without a scheduler synchronization. 1448 * Unregister a kprobe without a scheduler synchronization.
1234 */ 1449 */
1235static int __kprobes __unregister_kprobe_top(struct kprobe *p) 1450static int __kprobes __unregister_kprobe_top(struct kprobe *p)
1236{ 1451{
1237 struct kprobe *old_p, *list_p; 1452 struct kprobe *ap, *list_p;
1238 1453
1239 old_p = __get_valid_kprobe(p); 1454 /* Disable kprobe. This will disarm it if needed. */
1240 if (old_p == NULL) 1455 ap = __disable_kprobe(p);
1456 if (ap == NULL)
1241 return -EINVAL; 1457 return -EINVAL;
1242 1458
1243 if (old_p == p || 1459 if (ap == p)
1244 (kprobe_aggrprobe(old_p) &&
1245 list_is_singular(&old_p->list))) {
1246 /* 1460 /*
1247 * Only probe on the hash list. Disarm only if kprobes are 1461 * This probe is an independent(and non-optimized) kprobe
1248 * enabled and not gone - otherwise, the breakpoint would 1462 * (not an aggrprobe). Remove from the hash list.
1249 * already have been removed. We save on flushing icache.
1250 */ 1463 */
1251 if (!kprobes_all_disarmed && !kprobe_disabled(old_p)) 1464 goto disarmed;
1252 disarm_kprobe(old_p); 1465
1253 hlist_del_rcu(&old_p->hlist); 1466 /* Following process expects this probe is an aggrprobe */
1254 } else { 1467 WARN_ON(!kprobe_aggrprobe(ap));
1468
1469 if (list_is_singular(&ap->list) && kprobe_disarmed(ap))
1470 /*
1471 * !disarmed could be happen if the probe is under delayed
1472 * unoptimizing.
1473 */
1474 goto disarmed;
1475 else {
1476 /* If disabling probe has special handlers, update aggrprobe */
1255 if (p->break_handler && !kprobe_gone(p)) 1477 if (p->break_handler && !kprobe_gone(p))
1256 old_p->break_handler = NULL; 1478 ap->break_handler = NULL;
1257 if (p->post_handler && !kprobe_gone(p)) { 1479 if (p->post_handler && !kprobe_gone(p)) {
1258 list_for_each_entry_rcu(list_p, &old_p->list, list) { 1480 list_for_each_entry_rcu(list_p, &ap->list, list) {
1259 if ((list_p != p) && (list_p->post_handler)) 1481 if ((list_p != p) && (list_p->post_handler))
1260 goto noclean; 1482 goto noclean;
1261 } 1483 }
1262 old_p->post_handler = NULL; 1484 ap->post_handler = NULL;
1263 } 1485 }
1264noclean: 1486noclean:
1487 /*
1488 * Remove from the aggrprobe: this path will do nothing in
1489 * __unregister_kprobe_bottom().
1490 */
1265 list_del_rcu(&p->list); 1491 list_del_rcu(&p->list);
1266 if (!kprobe_disabled(old_p)) { 1492 if (!kprobe_disabled(ap) && !kprobes_all_disarmed)
1267 try_to_disable_aggr_kprobe(old_p); 1493 /*
1268 if (!kprobes_all_disarmed) { 1494 * Try to optimize this probe again, because post
1269 if (kprobe_disabled(old_p)) 1495 * handler may have been changed.
1270 disarm_kprobe(old_p); 1496 */
1271 else 1497 optimize_kprobe(ap);
1272 /* Try to optimize this probe again */
1273 optimize_kprobe(old_p);
1274 }
1275 }
1276 } 1498 }
1277 return 0; 1499 return 0;
1500
1501disarmed:
1502 BUG_ON(!kprobe_disarmed(ap));
1503 hlist_del_rcu(&ap->hlist);
1504 return 0;
1278} 1505}
1279 1506
1280static void __kprobes __unregister_kprobe_bottom(struct kprobe *p) 1507static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
1281{ 1508{
1282 struct kprobe *old_p; 1509 struct kprobe *ap;
1283 1510
1284 if (list_empty(&p->list)) 1511 if (list_empty(&p->list))
1512 /* This is an independent kprobe */
1285 arch_remove_kprobe(p); 1513 arch_remove_kprobe(p);
1286 else if (list_is_singular(&p->list)) { 1514 else if (list_is_singular(&p->list)) {
1287 /* "p" is the last child of an aggr_kprobe */ 1515 /* This is the last child of an aggrprobe */
1288 old_p = list_entry(p->list.next, struct kprobe, list); 1516 ap = list_entry(p->list.next, struct kprobe, list);
1289 list_del(&p->list); 1517 list_del(&p->list);
1290 arch_remove_kprobe(old_p); 1518 free_aggr_kprobe(ap);
1291 free_aggr_kprobe(old_p);
1292 } 1519 }
1520 /* Otherwise, do nothing. */
1293} 1521}
1294 1522
1295int __kprobes register_kprobes(struct kprobe **kps, int num) 1523int __kprobes register_kprobes(struct kprobe **kps, int num)
@@ -1607,29 +1835,13 @@ static void __kprobes kill_kprobe(struct kprobe *p)
1607int __kprobes disable_kprobe(struct kprobe *kp) 1835int __kprobes disable_kprobe(struct kprobe *kp)
1608{ 1836{
1609 int ret = 0; 1837 int ret = 0;
1610 struct kprobe *p;
1611 1838
1612 mutex_lock(&kprobe_mutex); 1839 mutex_lock(&kprobe_mutex);
1613 1840
1614 /* Check whether specified probe is valid. */ 1841 /* Disable this kprobe */
1615 p = __get_valid_kprobe(kp); 1842 if (__disable_kprobe(kp) == NULL)
1616 if (unlikely(p == NULL)) {
1617 ret = -EINVAL; 1843 ret = -EINVAL;
1618 goto out;
1619 }
1620 1844
1621 /* If the probe is already disabled (or gone), just return */
1622 if (kprobe_disabled(kp))
1623 goto out;
1624
1625 kp->flags |= KPROBE_FLAG_DISABLED;
1626 if (p != kp)
1627 /* When kp != p, p is always enabled. */
1628 try_to_disable_aggr_kprobe(p);
1629
1630 if (!kprobes_all_disarmed && kprobe_disabled(p))
1631 disarm_kprobe(p);
1632out:
1633 mutex_unlock(&kprobe_mutex); 1845 mutex_unlock(&kprobe_mutex);
1634 return ret; 1846 return ret;
1635} 1847}
@@ -1927,36 +2139,27 @@ static void __kprobes disarm_all_kprobes(void)
1927 mutex_lock(&kprobe_mutex); 2139 mutex_lock(&kprobe_mutex);
1928 2140
1929 /* If kprobes are already disarmed, just return */ 2141 /* If kprobes are already disarmed, just return */
1930 if (kprobes_all_disarmed) 2142 if (kprobes_all_disarmed) {
1931 goto already_disabled; 2143 mutex_unlock(&kprobe_mutex);
2144 return;
2145 }
1932 2146
1933 kprobes_all_disarmed = true; 2147 kprobes_all_disarmed = true;
1934 printk(KERN_INFO "Kprobes globally disabled\n"); 2148 printk(KERN_INFO "Kprobes globally disabled\n");
1935 2149
1936 /*
1937 * Here we call get_online_cpus() for avoiding text_mutex deadlock,
1938 * because disarming may also unoptimize kprobes.
1939 */
1940 get_online_cpus();
1941 mutex_lock(&text_mutex); 2150 mutex_lock(&text_mutex);
1942 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 2151 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1943 head = &kprobe_table[i]; 2152 head = &kprobe_table[i];
1944 hlist_for_each_entry_rcu(p, node, head, hlist) { 2153 hlist_for_each_entry_rcu(p, node, head, hlist) {
1945 if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) 2154 if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
1946 __disarm_kprobe(p); 2155 __disarm_kprobe(p, false);
1947 } 2156 }
1948 } 2157 }
1949
1950 mutex_unlock(&text_mutex); 2158 mutex_unlock(&text_mutex);
1951 put_online_cpus();
1952 mutex_unlock(&kprobe_mutex); 2159 mutex_unlock(&kprobe_mutex);
1953 /* Allow all currently running kprobes to complete */
1954 synchronize_sched();
1955 return;
1956 2160
1957already_disabled: 2161 /* Wait for disarming all kprobes by optimizer */
1958 mutex_unlock(&kprobe_mutex); 2162 wait_for_kprobe_optimizer();
1959 return;
1960} 2163}
1961 2164
1962/* 2165/*
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 2dc3786349d1..5355cfd44a3f 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -148,7 +148,7 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
148 wait_for_completion(&create.done); 148 wait_for_completion(&create.done);
149 149
150 if (!IS_ERR(create.result)) { 150 if (!IS_ERR(create.result)) {
151 struct sched_param param = { .sched_priority = 0 }; 151 static struct sched_param param = { .sched_priority = 0 };
152 va_list args; 152 va_list args;
153 153
154 va_start(args, namefmt); 154 va_start(args, namefmt);
@@ -265,6 +265,17 @@ int kthreadd(void *unused)
265 return 0; 265 return 0;
266} 266}
267 267
268void __init_kthread_worker(struct kthread_worker *worker,
269 const char *name,
270 struct lock_class_key *key)
271{
272 spin_lock_init(&worker->lock);
273 lockdep_set_class_and_name(&worker->lock, key, name);
274 INIT_LIST_HEAD(&worker->work_list);
275 worker->task = NULL;
276}
277EXPORT_SYMBOL_GPL(__init_kthread_worker);
278
268/** 279/**
269 * kthread_worker_fn - kthread function to process kthread_worker 280 * kthread_worker_fn - kthread function to process kthread_worker
270 * @worker_ptr: pointer to initialized kthread_worker 281 * @worker_ptr: pointer to initialized kthread_worker
diff --git a/kernel/module.c b/kernel/module.c
index d190664f25ff..34e00b708fad 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -56,6 +56,7 @@
56#include <linux/percpu.h> 56#include <linux/percpu.h>
57#include <linux/kmemleak.h> 57#include <linux/kmemleak.h>
58#include <linux/jump_label.h> 58#include <linux/jump_label.h>
59#include <linux/pfn.h>
59 60
60#define CREATE_TRACE_POINTS 61#define CREATE_TRACE_POINTS
61#include <trace/events/module.h> 62#include <trace/events/module.h>
@@ -70,6 +71,26 @@
70#define ARCH_SHF_SMALL 0 71#define ARCH_SHF_SMALL 0
71#endif 72#endif
72 73
74/*
75 * Modules' sections will be aligned on page boundaries
76 * to ensure complete separation of code and data, but
77 * only when CONFIG_DEBUG_SET_MODULE_RONX=y
78 */
79#ifdef CONFIG_DEBUG_SET_MODULE_RONX
80# define debug_align(X) ALIGN(X, PAGE_SIZE)
81#else
82# define debug_align(X) (X)
83#endif
84
85/*
86 * Given BASE and SIZE this macro calculates the number of pages the
87 * memory regions occupies
88 */
89#define MOD_NUMBER_OF_PAGES(BASE, SIZE) (((SIZE) > 0) ? \
90 (PFN_DOWN((unsigned long)(BASE) + (SIZE) - 1) - \
91 PFN_DOWN((unsigned long)BASE) + 1) \
92 : (0UL))
93
73/* If this is set, the section belongs in the init part of the module */ 94/* If this is set, the section belongs in the init part of the module */
74#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) 95#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
75 96
@@ -1542,6 +1563,115 @@ static int __unlink_module(void *_mod)
1542 return 0; 1563 return 0;
1543} 1564}
1544 1565
1566#ifdef CONFIG_DEBUG_SET_MODULE_RONX
1567/*
1568 * LKM RO/NX protection: protect module's text/ro-data
1569 * from modification and any data from execution.
1570 */
1571void set_page_attributes(void *start, void *end, int (*set)(unsigned long start, int num_pages))
1572{
1573 unsigned long begin_pfn = PFN_DOWN((unsigned long)start);
1574 unsigned long end_pfn = PFN_DOWN((unsigned long)end);
1575
1576 if (end_pfn > begin_pfn)
1577 set(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn);
1578}
1579
1580static void set_section_ro_nx(void *base,
1581 unsigned long text_size,
1582 unsigned long ro_size,
1583 unsigned long total_size)
1584{
1585 /* begin and end PFNs of the current subsection */
1586 unsigned long begin_pfn;
1587 unsigned long end_pfn;
1588
1589 /*
1590 * Set RO for module text and RO-data:
1591 * - Always protect first page.
1592 * - Do not protect last partial page.
1593 */
1594 if (ro_size > 0)
1595 set_page_attributes(base, base + ro_size, set_memory_ro);
1596
1597 /*
1598 * Set NX permissions for module data:
1599 * - Do not protect first partial page.
1600 * - Always protect last page.
1601 */
1602 if (total_size > text_size) {
1603 begin_pfn = PFN_UP((unsigned long)base + text_size);
1604 end_pfn = PFN_UP((unsigned long)base + total_size);
1605 if (end_pfn > begin_pfn)
1606 set_memory_nx(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn);
1607 }
1608}
1609
1610/* Setting memory back to RW+NX before releasing it */
1611void unset_section_ro_nx(struct module *mod, void *module_region)
1612{
1613 unsigned long total_pages;
1614
1615 if (mod->module_core == module_region) {
1616 /* Set core as NX+RW */
1617 total_pages = MOD_NUMBER_OF_PAGES(mod->module_core, mod->core_size);
1618 set_memory_nx((unsigned long)mod->module_core, total_pages);
1619 set_memory_rw((unsigned long)mod->module_core, total_pages);
1620
1621 } else if (mod->module_init == module_region) {
1622 /* Set init as NX+RW */
1623 total_pages = MOD_NUMBER_OF_PAGES(mod->module_init, mod->init_size);
1624 set_memory_nx((unsigned long)mod->module_init, total_pages);
1625 set_memory_rw((unsigned long)mod->module_init, total_pages);
1626 }
1627}
1628
1629/* Iterate through all modules and set each module's text as RW */
1630void set_all_modules_text_rw()
1631{
1632 struct module *mod;
1633
1634 mutex_lock(&module_mutex);
1635 list_for_each_entry_rcu(mod, &modules, list) {
1636 if ((mod->module_core) && (mod->core_text_size)) {
1637 set_page_attributes(mod->module_core,
1638 mod->module_core + mod->core_text_size,
1639 set_memory_rw);
1640 }
1641 if ((mod->module_init) && (mod->init_text_size)) {
1642 set_page_attributes(mod->module_init,
1643 mod->module_init + mod->init_text_size,
1644 set_memory_rw);
1645 }
1646 }
1647 mutex_unlock(&module_mutex);
1648}
1649
1650/* Iterate through all modules and set each module's text as RO */
1651void set_all_modules_text_ro()
1652{
1653 struct module *mod;
1654
1655 mutex_lock(&module_mutex);
1656 list_for_each_entry_rcu(mod, &modules, list) {
1657 if ((mod->module_core) && (mod->core_text_size)) {
1658 set_page_attributes(mod->module_core,
1659 mod->module_core + mod->core_text_size,
1660 set_memory_ro);
1661 }
1662 if ((mod->module_init) && (mod->init_text_size)) {
1663 set_page_attributes(mod->module_init,
1664 mod->module_init + mod->init_text_size,
1665 set_memory_ro);
1666 }
1667 }
1668 mutex_unlock(&module_mutex);
1669}
1670#else
1671static inline void set_section_ro_nx(void *base, unsigned long text_size, unsigned long ro_size, unsigned long total_size) { }
1672static inline void unset_section_ro_nx(struct module *mod, void *module_region) { }
1673#endif
1674
1545/* Free a module, remove from lists, etc. */ 1675/* Free a module, remove from lists, etc. */
1546static void free_module(struct module *mod) 1676static void free_module(struct module *mod)
1547{ 1677{
@@ -1566,6 +1696,7 @@ static void free_module(struct module *mod)
1566 destroy_params(mod->kp, mod->num_kp); 1696 destroy_params(mod->kp, mod->num_kp);
1567 1697
1568 /* This may be NULL, but that's OK */ 1698 /* This may be NULL, but that's OK */
1699 unset_section_ro_nx(mod, mod->module_init);
1569 module_free(mod, mod->module_init); 1700 module_free(mod, mod->module_init);
1570 kfree(mod->args); 1701 kfree(mod->args);
1571 percpu_modfree(mod); 1702 percpu_modfree(mod);
@@ -1574,6 +1705,7 @@ static void free_module(struct module *mod)
1574 lockdep_free_key_range(mod->module_core, mod->core_size); 1705 lockdep_free_key_range(mod->module_core, mod->core_size);
1575 1706
1576 /* Finally, free the core (containing the module structure) */ 1707 /* Finally, free the core (containing the module structure) */
1708 unset_section_ro_nx(mod, mod->module_core);
1577 module_free(mod, mod->module_core); 1709 module_free(mod, mod->module_core);
1578 1710
1579#ifdef CONFIG_MPU 1711#ifdef CONFIG_MPU
@@ -1777,8 +1909,19 @@ static void layout_sections(struct module *mod, struct load_info *info)
1777 s->sh_entsize = get_offset(mod, &mod->core_size, s, i); 1909 s->sh_entsize = get_offset(mod, &mod->core_size, s, i);
1778 DEBUGP("\t%s\n", name); 1910 DEBUGP("\t%s\n", name);
1779 } 1911 }
1780 if (m == 0) 1912 switch (m) {
1913 case 0: /* executable */
1914 mod->core_size = debug_align(mod->core_size);
1781 mod->core_text_size = mod->core_size; 1915 mod->core_text_size = mod->core_size;
1916 break;
1917 case 1: /* RO: text and ro-data */
1918 mod->core_size = debug_align(mod->core_size);
1919 mod->core_ro_size = mod->core_size;
1920 break;
1921 case 3: /* whole core */
1922 mod->core_size = debug_align(mod->core_size);
1923 break;
1924 }
1782 } 1925 }
1783 1926
1784 DEBUGP("Init section allocation order:\n"); 1927 DEBUGP("Init section allocation order:\n");
@@ -1796,8 +1939,19 @@ static void layout_sections(struct module *mod, struct load_info *info)
1796 | INIT_OFFSET_MASK); 1939 | INIT_OFFSET_MASK);
1797 DEBUGP("\t%s\n", sname); 1940 DEBUGP("\t%s\n", sname);
1798 } 1941 }
1799 if (m == 0) 1942 switch (m) {
1943 case 0: /* executable */
1944 mod->init_size = debug_align(mod->init_size);
1800 mod->init_text_size = mod->init_size; 1945 mod->init_text_size = mod->init_size;
1946 break;
1947 case 1: /* RO: text and ro-data */
1948 mod->init_size = debug_align(mod->init_size);
1949 mod->init_ro_size = mod->init_size;
1950 break;
1951 case 3: /* whole init */
1952 mod->init_size = debug_align(mod->init_size);
1953 break;
1954 }
1801 } 1955 }
1802} 1956}
1803 1957
@@ -2722,6 +2876,18 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
2722 blocking_notifier_call_chain(&module_notify_list, 2876 blocking_notifier_call_chain(&module_notify_list,
2723 MODULE_STATE_COMING, mod); 2877 MODULE_STATE_COMING, mod);
2724 2878
2879 /* Set RO and NX regions for core */
2880 set_section_ro_nx(mod->module_core,
2881 mod->core_text_size,
2882 mod->core_ro_size,
2883 mod->core_size);
2884
2885 /* Set RO and NX regions for init */
2886 set_section_ro_nx(mod->module_init,
2887 mod->init_text_size,
2888 mod->init_ro_size,
2889 mod->init_size);
2890
2725 do_mod_ctors(mod); 2891 do_mod_ctors(mod);
2726 /* Start the module */ 2892 /* Start the module */
2727 if (mod->init != NULL) 2893 if (mod->init != NULL)
@@ -2765,6 +2931,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
2765 mod->symtab = mod->core_symtab; 2931 mod->symtab = mod->core_symtab;
2766 mod->strtab = mod->core_strtab; 2932 mod->strtab = mod->core_strtab;
2767#endif 2933#endif
2934 unset_section_ro_nx(mod, mod->module_init);
2768 module_free(mod, mod->module_init); 2935 module_free(mod, mod->module_init);
2769 mod->module_init = NULL; 2936 mod->module_init = NULL;
2770 mod->init_size = 0; 2937 mod->init_size = 0;
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 200407c1502f..a5889fb28ecf 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -199,7 +199,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
199 * memory barriers as we'll eventually observe the right 199 * memory barriers as we'll eventually observe the right
200 * values at the cost of a few extra spins. 200 * values at the cost of a few extra spins.
201 */ 201 */
202 cpu_relax(); 202 arch_mutex_cpu_relax();
203 } 203 }
204#endif 204#endif
205 spin_lock_mutex(&lock->wait_lock, flags); 205 spin_lock_mutex(&lock->wait_lock, flags);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 2870feee81dd..11847bf1e8cc 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -13,6 +13,7 @@
13#include <linux/mm.h> 13#include <linux/mm.h>
14#include <linux/cpu.h> 14#include <linux/cpu.h>
15#include <linux/smp.h> 15#include <linux/smp.h>
16#include <linux/idr.h>
16#include <linux/file.h> 17#include <linux/file.h>
17#include <linux/poll.h> 18#include <linux/poll.h>
18#include <linux/slab.h> 19#include <linux/slab.h>
@@ -21,7 +22,9 @@
21#include <linux/dcache.h> 22#include <linux/dcache.h>
22#include <linux/percpu.h> 23#include <linux/percpu.h>
23#include <linux/ptrace.h> 24#include <linux/ptrace.h>
25#include <linux/reboot.h>
24#include <linux/vmstat.h> 26#include <linux/vmstat.h>
27#include <linux/device.h>
25#include <linux/vmalloc.h> 28#include <linux/vmalloc.h>
26#include <linux/hardirq.h> 29#include <linux/hardirq.h>
27#include <linux/rculist.h> 30#include <linux/rculist.h>
@@ -133,6 +136,28 @@ static void unclone_ctx(struct perf_event_context *ctx)
133 } 136 }
134} 137}
135 138
139static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
140{
141 /*
142 * only top level events have the pid namespace they were created in
143 */
144 if (event->parent)
145 event = event->parent;
146
147 return task_tgid_nr_ns(p, event->ns);
148}
149
150static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
151{
152 /*
153 * only top level events have the pid namespace they were created in
154 */
155 if (event->parent)
156 event = event->parent;
157
158 return task_pid_nr_ns(p, event->ns);
159}
160
136/* 161/*
137 * If we inherit events we want to return the parent event id 162 * If we inherit events we want to return the parent event id
138 * to userspace. 163 * to userspace.
@@ -312,9 +337,84 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
312 ctx->nr_stat++; 337 ctx->nr_stat++;
313} 338}
314 339
340/*
341 * Called at perf_event creation and when events are attached/detached from a
342 * group.
343 */
344static void perf_event__read_size(struct perf_event *event)
345{
346 int entry = sizeof(u64); /* value */
347 int size = 0;
348 int nr = 1;
349
350 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
351 size += sizeof(u64);
352
353 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
354 size += sizeof(u64);
355
356 if (event->attr.read_format & PERF_FORMAT_ID)
357 entry += sizeof(u64);
358
359 if (event->attr.read_format & PERF_FORMAT_GROUP) {
360 nr += event->group_leader->nr_siblings;
361 size += sizeof(u64);
362 }
363
364 size += entry * nr;
365 event->read_size = size;
366}
367
368static void perf_event__header_size(struct perf_event *event)
369{
370 struct perf_sample_data *data;
371 u64 sample_type = event->attr.sample_type;
372 u16 size = 0;
373
374 perf_event__read_size(event);
375
376 if (sample_type & PERF_SAMPLE_IP)
377 size += sizeof(data->ip);
378
379 if (sample_type & PERF_SAMPLE_ADDR)
380 size += sizeof(data->addr);
381
382 if (sample_type & PERF_SAMPLE_PERIOD)
383 size += sizeof(data->period);
384
385 if (sample_type & PERF_SAMPLE_READ)
386 size += event->read_size;
387
388 event->header_size = size;
389}
390
391static void perf_event__id_header_size(struct perf_event *event)
392{
393 struct perf_sample_data *data;
394 u64 sample_type = event->attr.sample_type;
395 u16 size = 0;
396
397 if (sample_type & PERF_SAMPLE_TID)
398 size += sizeof(data->tid_entry);
399
400 if (sample_type & PERF_SAMPLE_TIME)
401 size += sizeof(data->time);
402
403 if (sample_type & PERF_SAMPLE_ID)
404 size += sizeof(data->id);
405
406 if (sample_type & PERF_SAMPLE_STREAM_ID)
407 size += sizeof(data->stream_id);
408
409 if (sample_type & PERF_SAMPLE_CPU)
410 size += sizeof(data->cpu_entry);
411
412 event->id_header_size = size;
413}
414
315static void perf_group_attach(struct perf_event *event) 415static void perf_group_attach(struct perf_event *event)
316{ 416{
317 struct perf_event *group_leader = event->group_leader; 417 struct perf_event *group_leader = event->group_leader, *pos;
318 418
319 /* 419 /*
320 * We can have double attach due to group movement in perf_event_open. 420 * We can have double attach due to group movement in perf_event_open.
@@ -333,6 +433,11 @@ static void perf_group_attach(struct perf_event *event)
333 433
334 list_add_tail(&event->group_entry, &group_leader->sibling_list); 434 list_add_tail(&event->group_entry, &group_leader->sibling_list);
335 group_leader->nr_siblings++; 435 group_leader->nr_siblings++;
436
437 perf_event__header_size(group_leader);
438
439 list_for_each_entry(pos, &group_leader->sibling_list, group_entry)
440 perf_event__header_size(pos);
336} 441}
337 442
338/* 443/*
@@ -391,7 +496,7 @@ static void perf_group_detach(struct perf_event *event)
391 if (event->group_leader != event) { 496 if (event->group_leader != event) {
392 list_del_init(&event->group_entry); 497 list_del_init(&event->group_entry);
393 event->group_leader->nr_siblings--; 498 event->group_leader->nr_siblings--;
394 return; 499 goto out;
395 } 500 }
396 501
397 if (!list_empty(&event->group_entry)) 502 if (!list_empty(&event->group_entry))
@@ -410,6 +515,12 @@ static void perf_group_detach(struct perf_event *event)
410 /* Inherit group flags from the previous leader */ 515 /* Inherit group flags from the previous leader */
411 sibling->group_flags = event->group_flags; 516 sibling->group_flags = event->group_flags;
412 } 517 }
518
519out:
520 perf_event__header_size(event->group_leader);
521
522 list_for_each_entry(tmp, &event->group_leader->sibling_list, group_entry)
523 perf_event__header_size(tmp);
413} 524}
414 525
415static inline int 526static inline int
@@ -1073,7 +1184,7 @@ static int perf_event_refresh(struct perf_event *event, int refresh)
1073 /* 1184 /*
1074 * not supported on inherited events 1185 * not supported on inherited events
1075 */ 1186 */
1076 if (event->attr.inherit) 1187 if (event->attr.inherit || !is_sampling_event(event))
1077 return -EINVAL; 1188 return -EINVAL;
1078 1189
1079 atomic_add(refresh, &event->event_limit); 1190 atomic_add(refresh, &event->event_limit);
@@ -2289,31 +2400,6 @@ static int perf_release(struct inode *inode, struct file *file)
2289 return perf_event_release_kernel(event); 2400 return perf_event_release_kernel(event);
2290} 2401}
2291 2402
2292static int perf_event_read_size(struct perf_event *event)
2293{
2294 int entry = sizeof(u64); /* value */
2295 int size = 0;
2296 int nr = 1;
2297
2298 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
2299 size += sizeof(u64);
2300
2301 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
2302 size += sizeof(u64);
2303
2304 if (event->attr.read_format & PERF_FORMAT_ID)
2305 entry += sizeof(u64);
2306
2307 if (event->attr.read_format & PERF_FORMAT_GROUP) {
2308 nr += event->group_leader->nr_siblings;
2309 size += sizeof(u64);
2310 }
2311
2312 size += entry * nr;
2313
2314 return size;
2315}
2316
2317u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) 2403u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
2318{ 2404{
2319 struct perf_event *child; 2405 struct perf_event *child;
@@ -2428,7 +2514,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
2428 if (event->state == PERF_EVENT_STATE_ERROR) 2514 if (event->state == PERF_EVENT_STATE_ERROR)
2429 return 0; 2515 return 0;
2430 2516
2431 if (count < perf_event_read_size(event)) 2517 if (count < event->read_size)
2432 return -ENOSPC; 2518 return -ENOSPC;
2433 2519
2434 WARN_ON_ONCE(event->ctx->parent_ctx); 2520 WARN_ON_ONCE(event->ctx->parent_ctx);
@@ -2514,7 +2600,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
2514 int ret = 0; 2600 int ret = 0;
2515 u64 value; 2601 u64 value;
2516 2602
2517 if (!event->attr.sample_period) 2603 if (!is_sampling_event(event))
2518 return -EINVAL; 2604 return -EINVAL;
2519 2605
2520 if (copy_from_user(&value, arg, sizeof(value))) 2606 if (copy_from_user(&value, arg, sizeof(value)))
@@ -3305,6 +3391,73 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle,
3305 } while (len); 3391 } while (len);
3306} 3392}
3307 3393
3394static void __perf_event_header__init_id(struct perf_event_header *header,
3395 struct perf_sample_data *data,
3396 struct perf_event *event)
3397{
3398 u64 sample_type = event->attr.sample_type;
3399
3400 data->type = sample_type;
3401 header->size += event->id_header_size;
3402
3403 if (sample_type & PERF_SAMPLE_TID) {
3404 /* namespace issues */
3405 data->tid_entry.pid = perf_event_pid(event, current);
3406 data->tid_entry.tid = perf_event_tid(event, current);
3407 }
3408
3409 if (sample_type & PERF_SAMPLE_TIME)
3410 data->time = perf_clock();
3411
3412 if (sample_type & PERF_SAMPLE_ID)
3413 data->id = primary_event_id(event);
3414
3415 if (sample_type & PERF_SAMPLE_STREAM_ID)
3416 data->stream_id = event->id;
3417
3418 if (sample_type & PERF_SAMPLE_CPU) {
3419 data->cpu_entry.cpu = raw_smp_processor_id();
3420 data->cpu_entry.reserved = 0;
3421 }
3422}
3423
3424static void perf_event_header__init_id(struct perf_event_header *header,
3425 struct perf_sample_data *data,
3426 struct perf_event *event)
3427{
3428 if (event->attr.sample_id_all)
3429 __perf_event_header__init_id(header, data, event);
3430}
3431
3432static void __perf_event__output_id_sample(struct perf_output_handle *handle,
3433 struct perf_sample_data *data)
3434{
3435 u64 sample_type = data->type;
3436
3437 if (sample_type & PERF_SAMPLE_TID)
3438 perf_output_put(handle, data->tid_entry);
3439
3440 if (sample_type & PERF_SAMPLE_TIME)
3441 perf_output_put(handle, data->time);
3442
3443 if (sample_type & PERF_SAMPLE_ID)
3444 perf_output_put(handle, data->id);
3445
3446 if (sample_type & PERF_SAMPLE_STREAM_ID)
3447 perf_output_put(handle, data->stream_id);
3448
3449 if (sample_type & PERF_SAMPLE_CPU)
3450 perf_output_put(handle, data->cpu_entry);
3451}
3452
3453static void perf_event__output_id_sample(struct perf_event *event,
3454 struct perf_output_handle *handle,
3455 struct perf_sample_data *sample)
3456{
3457 if (event->attr.sample_id_all)
3458 __perf_event__output_id_sample(handle, sample);
3459}
3460
3308int perf_output_begin(struct perf_output_handle *handle, 3461int perf_output_begin(struct perf_output_handle *handle,
3309 struct perf_event *event, unsigned int size, 3462 struct perf_event *event, unsigned int size,
3310 int nmi, int sample) 3463 int nmi, int sample)
@@ -3312,6 +3465,7 @@ int perf_output_begin(struct perf_output_handle *handle,
3312 struct perf_buffer *buffer; 3465 struct perf_buffer *buffer;
3313 unsigned long tail, offset, head; 3466 unsigned long tail, offset, head;
3314 int have_lost; 3467 int have_lost;
3468 struct perf_sample_data sample_data;
3315 struct { 3469 struct {
3316 struct perf_event_header header; 3470 struct perf_event_header header;
3317 u64 id; 3471 u64 id;
@@ -3338,8 +3492,12 @@ int perf_output_begin(struct perf_output_handle *handle,
3338 goto out; 3492 goto out;
3339 3493
3340 have_lost = local_read(&buffer->lost); 3494 have_lost = local_read(&buffer->lost);
3341 if (have_lost) 3495 if (have_lost) {
3342 size += sizeof(lost_event); 3496 lost_event.header.size = sizeof(lost_event);
3497 perf_event_header__init_id(&lost_event.header, &sample_data,
3498 event);
3499 size += lost_event.header.size;
3500 }
3343 3501
3344 perf_output_get_handle(handle); 3502 perf_output_get_handle(handle);
3345 3503
@@ -3370,11 +3528,11 @@ int perf_output_begin(struct perf_output_handle *handle,
3370 if (have_lost) { 3528 if (have_lost) {
3371 lost_event.header.type = PERF_RECORD_LOST; 3529 lost_event.header.type = PERF_RECORD_LOST;
3372 lost_event.header.misc = 0; 3530 lost_event.header.misc = 0;
3373 lost_event.header.size = sizeof(lost_event);
3374 lost_event.id = event->id; 3531 lost_event.id = event->id;
3375 lost_event.lost = local_xchg(&buffer->lost, 0); 3532 lost_event.lost = local_xchg(&buffer->lost, 0);
3376 3533
3377 perf_output_put(handle, lost_event); 3534 perf_output_put(handle, lost_event);
3535 perf_event__output_id_sample(event, handle, &sample_data);
3378 } 3536 }
3379 3537
3380 return 0; 3538 return 0;
@@ -3407,28 +3565,6 @@ void perf_output_end(struct perf_output_handle *handle)
3407 rcu_read_unlock(); 3565 rcu_read_unlock();
3408} 3566}
3409 3567
3410static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
3411{
3412 /*
3413 * only top level events have the pid namespace they were created in
3414 */
3415 if (event->parent)
3416 event = event->parent;
3417
3418 return task_tgid_nr_ns(p, event->ns);
3419}
3420
3421static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
3422{
3423 /*
3424 * only top level events have the pid namespace they were created in
3425 */
3426 if (event->parent)
3427 event = event->parent;
3428
3429 return task_pid_nr_ns(p, event->ns);
3430}
3431
3432static void perf_output_read_one(struct perf_output_handle *handle, 3568static void perf_output_read_one(struct perf_output_handle *handle,
3433 struct perf_event *event, 3569 struct perf_event *event,
3434 u64 enabled, u64 running) 3570 u64 enabled, u64 running)
@@ -3603,61 +3739,16 @@ void perf_prepare_sample(struct perf_event_header *header,
3603{ 3739{
3604 u64 sample_type = event->attr.sample_type; 3740 u64 sample_type = event->attr.sample_type;
3605 3741
3606 data->type = sample_type;
3607
3608 header->type = PERF_RECORD_SAMPLE; 3742 header->type = PERF_RECORD_SAMPLE;
3609 header->size = sizeof(*header); 3743 header->size = sizeof(*header) + event->header_size;
3610 3744
3611 header->misc = 0; 3745 header->misc = 0;
3612 header->misc |= perf_misc_flags(regs); 3746 header->misc |= perf_misc_flags(regs);
3613 3747
3614 if (sample_type & PERF_SAMPLE_IP) { 3748 __perf_event_header__init_id(header, data, event);
3615 data->ip = perf_instruction_pointer(regs);
3616
3617 header->size += sizeof(data->ip);
3618 }
3619
3620 if (sample_type & PERF_SAMPLE_TID) {
3621 /* namespace issues */
3622 data->tid_entry.pid = perf_event_pid(event, current);
3623 data->tid_entry.tid = perf_event_tid(event, current);
3624
3625 header->size += sizeof(data->tid_entry);
3626 }
3627
3628 if (sample_type & PERF_SAMPLE_TIME) {
3629 data->time = perf_clock();
3630
3631 header->size += sizeof(data->time);
3632 }
3633
3634 if (sample_type & PERF_SAMPLE_ADDR)
3635 header->size += sizeof(data->addr);
3636
3637 if (sample_type & PERF_SAMPLE_ID) {
3638 data->id = primary_event_id(event);
3639
3640 header->size += sizeof(data->id);
3641 }
3642
3643 if (sample_type & PERF_SAMPLE_STREAM_ID) {
3644 data->stream_id = event->id;
3645
3646 header->size += sizeof(data->stream_id);
3647 }
3648
3649 if (sample_type & PERF_SAMPLE_CPU) {
3650 data->cpu_entry.cpu = raw_smp_processor_id();
3651 data->cpu_entry.reserved = 0;
3652
3653 header->size += sizeof(data->cpu_entry);
3654 }
3655
3656 if (sample_type & PERF_SAMPLE_PERIOD)
3657 header->size += sizeof(data->period);
3658 3749
3659 if (sample_type & PERF_SAMPLE_READ) 3750 if (sample_type & PERF_SAMPLE_IP)
3660 header->size += perf_event_read_size(event); 3751 data->ip = perf_instruction_pointer(regs);
3661 3752
3662 if (sample_type & PERF_SAMPLE_CALLCHAIN) { 3753 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
3663 int size = 1; 3754 int size = 1;
@@ -3722,23 +3813,26 @@ perf_event_read_event(struct perf_event *event,
3722 struct task_struct *task) 3813 struct task_struct *task)
3723{ 3814{
3724 struct perf_output_handle handle; 3815 struct perf_output_handle handle;
3816 struct perf_sample_data sample;
3725 struct perf_read_event read_event = { 3817 struct perf_read_event read_event = {
3726 .header = { 3818 .header = {
3727 .type = PERF_RECORD_READ, 3819 .type = PERF_RECORD_READ,
3728 .misc = 0, 3820 .misc = 0,
3729 .size = sizeof(read_event) + perf_event_read_size(event), 3821 .size = sizeof(read_event) + event->read_size,
3730 }, 3822 },
3731 .pid = perf_event_pid(event, task), 3823 .pid = perf_event_pid(event, task),
3732 .tid = perf_event_tid(event, task), 3824 .tid = perf_event_tid(event, task),
3733 }; 3825 };
3734 int ret; 3826 int ret;
3735 3827
3828 perf_event_header__init_id(&read_event.header, &sample, event);
3736 ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0); 3829 ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0);
3737 if (ret) 3830 if (ret)
3738 return; 3831 return;
3739 3832
3740 perf_output_put(&handle, read_event); 3833 perf_output_put(&handle, read_event);
3741 perf_output_read(&handle, event); 3834 perf_output_read(&handle, event);
3835 perf_event__output_id_sample(event, &handle, &sample);
3742 3836
3743 perf_output_end(&handle); 3837 perf_output_end(&handle);
3744} 3838}
@@ -3768,14 +3862,16 @@ static void perf_event_task_output(struct perf_event *event,
3768 struct perf_task_event *task_event) 3862 struct perf_task_event *task_event)
3769{ 3863{
3770 struct perf_output_handle handle; 3864 struct perf_output_handle handle;
3865 struct perf_sample_data sample;
3771 struct task_struct *task = task_event->task; 3866 struct task_struct *task = task_event->task;
3772 int size, ret; 3867 int ret, size = task_event->event_id.header.size;
3773 3868
3774 size = task_event->event_id.header.size; 3869 perf_event_header__init_id(&task_event->event_id.header, &sample, event);
3775 ret = perf_output_begin(&handle, event, size, 0, 0);
3776 3870
3871 ret = perf_output_begin(&handle, event,
3872 task_event->event_id.header.size, 0, 0);
3777 if (ret) 3873 if (ret)
3778 return; 3874 goto out;
3779 3875
3780 task_event->event_id.pid = perf_event_pid(event, task); 3876 task_event->event_id.pid = perf_event_pid(event, task);
3781 task_event->event_id.ppid = perf_event_pid(event, current); 3877 task_event->event_id.ppid = perf_event_pid(event, current);
@@ -3785,7 +3881,11 @@ static void perf_event_task_output(struct perf_event *event,
3785 3881
3786 perf_output_put(&handle, task_event->event_id); 3882 perf_output_put(&handle, task_event->event_id);
3787 3883
3884 perf_event__output_id_sample(event, &handle, &sample);
3885
3788 perf_output_end(&handle); 3886 perf_output_end(&handle);
3887out:
3888 task_event->event_id.header.size = size;
3789} 3889}
3790 3890
3791static int perf_event_task_match(struct perf_event *event) 3891static int perf_event_task_match(struct perf_event *event)
@@ -3900,11 +4000,16 @@ static void perf_event_comm_output(struct perf_event *event,
3900 struct perf_comm_event *comm_event) 4000 struct perf_comm_event *comm_event)
3901{ 4001{
3902 struct perf_output_handle handle; 4002 struct perf_output_handle handle;
4003 struct perf_sample_data sample;
3903 int size = comm_event->event_id.header.size; 4004 int size = comm_event->event_id.header.size;
3904 int ret = perf_output_begin(&handle, event, size, 0, 0); 4005 int ret;
4006
4007 perf_event_header__init_id(&comm_event->event_id.header, &sample, event);
4008 ret = perf_output_begin(&handle, event,
4009 comm_event->event_id.header.size, 0, 0);
3905 4010
3906 if (ret) 4011 if (ret)
3907 return; 4012 goto out;
3908 4013
3909 comm_event->event_id.pid = perf_event_pid(event, comm_event->task); 4014 comm_event->event_id.pid = perf_event_pid(event, comm_event->task);
3910 comm_event->event_id.tid = perf_event_tid(event, comm_event->task); 4015 comm_event->event_id.tid = perf_event_tid(event, comm_event->task);
@@ -3912,7 +4017,12 @@ static void perf_event_comm_output(struct perf_event *event,
3912 perf_output_put(&handle, comm_event->event_id); 4017 perf_output_put(&handle, comm_event->event_id);
3913 perf_output_copy(&handle, comm_event->comm, 4018 perf_output_copy(&handle, comm_event->comm,
3914 comm_event->comm_size); 4019 comm_event->comm_size);
4020
4021 perf_event__output_id_sample(event, &handle, &sample);
4022
3915 perf_output_end(&handle); 4023 perf_output_end(&handle);
4024out:
4025 comm_event->event_id.header.size = size;
3916} 4026}
3917 4027
3918static int perf_event_comm_match(struct perf_event *event) 4028static int perf_event_comm_match(struct perf_event *event)
@@ -3957,7 +4067,6 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
3957 comm_event->comm_size = size; 4067 comm_event->comm_size = size;
3958 4068
3959 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; 4069 comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
3960
3961 rcu_read_lock(); 4070 rcu_read_lock();
3962 list_for_each_entry_rcu(pmu, &pmus, entry) { 4071 list_for_each_entry_rcu(pmu, &pmus, entry) {
3963 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); 4072 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
@@ -4038,11 +4147,15 @@ static void perf_event_mmap_output(struct perf_event *event,
4038 struct perf_mmap_event *mmap_event) 4147 struct perf_mmap_event *mmap_event)
4039{ 4148{
4040 struct perf_output_handle handle; 4149 struct perf_output_handle handle;
4150 struct perf_sample_data sample;
4041 int size = mmap_event->event_id.header.size; 4151 int size = mmap_event->event_id.header.size;
4042 int ret = perf_output_begin(&handle, event, size, 0, 0); 4152 int ret;
4043 4153
4154 perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
4155 ret = perf_output_begin(&handle, event,
4156 mmap_event->event_id.header.size, 0, 0);
4044 if (ret) 4157 if (ret)
4045 return; 4158 goto out;
4046 4159
4047 mmap_event->event_id.pid = perf_event_pid(event, current); 4160 mmap_event->event_id.pid = perf_event_pid(event, current);
4048 mmap_event->event_id.tid = perf_event_tid(event, current); 4161 mmap_event->event_id.tid = perf_event_tid(event, current);
@@ -4050,7 +4163,12 @@ static void perf_event_mmap_output(struct perf_event *event,
4050 perf_output_put(&handle, mmap_event->event_id); 4163 perf_output_put(&handle, mmap_event->event_id);
4051 perf_output_copy(&handle, mmap_event->file_name, 4164 perf_output_copy(&handle, mmap_event->file_name,
4052 mmap_event->file_size); 4165 mmap_event->file_size);
4166
4167 perf_event__output_id_sample(event, &handle, &sample);
4168
4053 perf_output_end(&handle); 4169 perf_output_end(&handle);
4170out:
4171 mmap_event->event_id.header.size = size;
4054} 4172}
4055 4173
4056static int perf_event_mmap_match(struct perf_event *event, 4174static int perf_event_mmap_match(struct perf_event *event,
@@ -4205,6 +4323,7 @@ void perf_event_mmap(struct vm_area_struct *vma)
4205static void perf_log_throttle(struct perf_event *event, int enable) 4323static void perf_log_throttle(struct perf_event *event, int enable)
4206{ 4324{
4207 struct perf_output_handle handle; 4325 struct perf_output_handle handle;
4326 struct perf_sample_data sample;
4208 int ret; 4327 int ret;
4209 4328
4210 struct { 4329 struct {
@@ -4226,11 +4345,15 @@ static void perf_log_throttle(struct perf_event *event, int enable)
4226 if (enable) 4345 if (enable)
4227 throttle_event.header.type = PERF_RECORD_UNTHROTTLE; 4346 throttle_event.header.type = PERF_RECORD_UNTHROTTLE;
4228 4347
4229 ret = perf_output_begin(&handle, event, sizeof(throttle_event), 1, 0); 4348 perf_event_header__init_id(&throttle_event.header, &sample, event);
4349
4350 ret = perf_output_begin(&handle, event,
4351 throttle_event.header.size, 1, 0);
4230 if (ret) 4352 if (ret)
4231 return; 4353 return;
4232 4354
4233 perf_output_put(&handle, throttle_event); 4355 perf_output_put(&handle, throttle_event);
4356 perf_event__output_id_sample(event, &handle, &sample);
4234 perf_output_end(&handle); 4357 perf_output_end(&handle);
4235} 4358}
4236 4359
@@ -4246,6 +4369,13 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
4246 struct hw_perf_event *hwc = &event->hw; 4369 struct hw_perf_event *hwc = &event->hw;
4247 int ret = 0; 4370 int ret = 0;
4248 4371
4372 /*
4373 * Non-sampling counters might still use the PMI to fold short
4374 * hardware counters, ignore those.
4375 */
4376 if (unlikely(!is_sampling_event(event)))
4377 return 0;
4378
4249 if (!throttle) { 4379 if (!throttle) {
4250 hwc->interrupts++; 4380 hwc->interrupts++;
4251 } else { 4381 } else {
@@ -4391,7 +4521,7 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
4391 if (!regs) 4521 if (!regs)
4392 return; 4522 return;
4393 4523
4394 if (!hwc->sample_period) 4524 if (!is_sampling_event(event))
4395 return; 4525 return;
4396 4526
4397 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) 4527 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
@@ -4554,7 +4684,7 @@ static int perf_swevent_add(struct perf_event *event, int flags)
4554 struct hw_perf_event *hwc = &event->hw; 4684 struct hw_perf_event *hwc = &event->hw;
4555 struct hlist_head *head; 4685 struct hlist_head *head;
4556 4686
4557 if (hwc->sample_period) { 4687 if (is_sampling_event(event)) {
4558 hwc->last_period = hwc->sample_period; 4688 hwc->last_period = hwc->sample_period;
4559 perf_swevent_set_period(event); 4689 perf_swevent_set_period(event);
4560 } 4690 }
@@ -4811,15 +4941,6 @@ static int perf_tp_event_init(struct perf_event *event)
4811 if (event->attr.type != PERF_TYPE_TRACEPOINT) 4941 if (event->attr.type != PERF_TYPE_TRACEPOINT)
4812 return -ENOENT; 4942 return -ENOENT;
4813 4943
4814 /*
4815 * Raw tracepoint data is a severe data leak, only allow root to
4816 * have these.
4817 */
4818 if ((event->attr.sample_type & PERF_SAMPLE_RAW) &&
4819 perf_paranoid_tracepoint_raw() &&
4820 !capable(CAP_SYS_ADMIN))
4821 return -EPERM;
4822
4823 err = perf_trace_init(event); 4944 err = perf_trace_init(event);
4824 if (err) 4945 if (err)
4825 return err; 4946 return err;
@@ -4842,7 +4963,7 @@ static struct pmu perf_tracepoint = {
4842 4963
4843static inline void perf_tp_register(void) 4964static inline void perf_tp_register(void)
4844{ 4965{
4845 perf_pmu_register(&perf_tracepoint); 4966 perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT);
4846} 4967}
4847 4968
4848static int perf_event_set_filter(struct perf_event *event, void __user *arg) 4969static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@@ -4932,31 +5053,33 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
4932static void perf_swevent_start_hrtimer(struct perf_event *event) 5053static void perf_swevent_start_hrtimer(struct perf_event *event)
4933{ 5054{
4934 struct hw_perf_event *hwc = &event->hw; 5055 struct hw_perf_event *hwc = &event->hw;
5056 s64 period;
5057
5058 if (!is_sampling_event(event))
5059 return;
4935 5060
4936 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 5061 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
4937 hwc->hrtimer.function = perf_swevent_hrtimer; 5062 hwc->hrtimer.function = perf_swevent_hrtimer;
4938 if (hwc->sample_period) {
4939 s64 period = local64_read(&hwc->period_left);
4940 5063
4941 if (period) { 5064 period = local64_read(&hwc->period_left);
4942 if (period < 0) 5065 if (period) {
4943 period = 10000; 5066 if (period < 0)
5067 period = 10000;
4944 5068
4945 local64_set(&hwc->period_left, 0); 5069 local64_set(&hwc->period_left, 0);
4946 } else { 5070 } else {
4947 period = max_t(u64, 10000, hwc->sample_period); 5071 period = max_t(u64, 10000, hwc->sample_period);
4948 } 5072 }
4949 __hrtimer_start_range_ns(&hwc->hrtimer, 5073 __hrtimer_start_range_ns(&hwc->hrtimer,
4950 ns_to_ktime(period), 0, 5074 ns_to_ktime(period), 0,
4951 HRTIMER_MODE_REL_PINNED, 0); 5075 HRTIMER_MODE_REL_PINNED, 0);
4952 }
4953} 5076}
4954 5077
4955static void perf_swevent_cancel_hrtimer(struct perf_event *event) 5078static void perf_swevent_cancel_hrtimer(struct perf_event *event)
4956{ 5079{
4957 struct hw_perf_event *hwc = &event->hw; 5080 struct hw_perf_event *hwc = &event->hw;
4958 5081
4959 if (hwc->sample_period) { 5082 if (is_sampling_event(event)) {
4960 ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); 5083 ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
4961 local64_set(&hwc->period_left, ktime_to_ns(remaining)); 5084 local64_set(&hwc->period_left, ktime_to_ns(remaining));
4962 5085
@@ -5184,8 +5307,61 @@ static void free_pmu_context(struct pmu *pmu)
5184out: 5307out:
5185 mutex_unlock(&pmus_lock); 5308 mutex_unlock(&pmus_lock);
5186} 5309}
5310static struct idr pmu_idr;
5311
5312static ssize_t
5313type_show(struct device *dev, struct device_attribute *attr, char *page)
5314{
5315 struct pmu *pmu = dev_get_drvdata(dev);
5316
5317 return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type);
5318}
5319
5320static struct device_attribute pmu_dev_attrs[] = {
5321 __ATTR_RO(type),
5322 __ATTR_NULL,
5323};
5324
5325static int pmu_bus_running;
5326static struct bus_type pmu_bus = {
5327 .name = "event_source",
5328 .dev_attrs = pmu_dev_attrs,
5329};
5330
5331static void pmu_dev_release(struct device *dev)
5332{
5333 kfree(dev);
5334}
5335
5336static int pmu_dev_alloc(struct pmu *pmu)
5337{
5338 int ret = -ENOMEM;
5339
5340 pmu->dev = kzalloc(sizeof(struct device), GFP_KERNEL);
5341 if (!pmu->dev)
5342 goto out;
5343
5344 device_initialize(pmu->dev);
5345 ret = dev_set_name(pmu->dev, "%s", pmu->name);
5346 if (ret)
5347 goto free_dev;
5348
5349 dev_set_drvdata(pmu->dev, pmu);
5350 pmu->dev->bus = &pmu_bus;
5351 pmu->dev->release = pmu_dev_release;
5352 ret = device_add(pmu->dev);
5353 if (ret)
5354 goto free_dev;
5355
5356out:
5357 return ret;
5358
5359free_dev:
5360 put_device(pmu->dev);
5361 goto out;
5362}
5187 5363
5188int perf_pmu_register(struct pmu *pmu) 5364int perf_pmu_register(struct pmu *pmu, char *name, int type)
5189{ 5365{
5190 int cpu, ret; 5366 int cpu, ret;
5191 5367
@@ -5195,13 +5371,38 @@ int perf_pmu_register(struct pmu *pmu)
5195 if (!pmu->pmu_disable_count) 5371 if (!pmu->pmu_disable_count)
5196 goto unlock; 5372 goto unlock;
5197 5373
5374 pmu->type = -1;
5375 if (!name)
5376 goto skip_type;
5377 pmu->name = name;
5378
5379 if (type < 0) {
5380 int err = idr_pre_get(&pmu_idr, GFP_KERNEL);
5381 if (!err)
5382 goto free_pdc;
5383
5384 err = idr_get_new_above(&pmu_idr, pmu, PERF_TYPE_MAX, &type);
5385 if (err) {
5386 ret = err;
5387 goto free_pdc;
5388 }
5389 }
5390 pmu->type = type;
5391
5392 if (pmu_bus_running) {
5393 ret = pmu_dev_alloc(pmu);
5394 if (ret)
5395 goto free_idr;
5396 }
5397
5398skip_type:
5198 pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr); 5399 pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr);
5199 if (pmu->pmu_cpu_context) 5400 if (pmu->pmu_cpu_context)
5200 goto got_cpu_context; 5401 goto got_cpu_context;
5201 5402
5202 pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); 5403 pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context);
5203 if (!pmu->pmu_cpu_context) 5404 if (!pmu->pmu_cpu_context)
5204 goto free_pdc; 5405 goto free_dev;
5205 5406
5206 for_each_possible_cpu(cpu) { 5407 for_each_possible_cpu(cpu) {
5207 struct perf_cpu_context *cpuctx; 5408 struct perf_cpu_context *cpuctx;
@@ -5245,6 +5446,14 @@ unlock:
5245 5446
5246 return ret; 5447 return ret;
5247 5448
5449free_dev:
5450 device_del(pmu->dev);
5451 put_device(pmu->dev);
5452
5453free_idr:
5454 if (pmu->type >= PERF_TYPE_MAX)
5455 idr_remove(&pmu_idr, pmu->type);
5456
5248free_pdc: 5457free_pdc:
5249 free_percpu(pmu->pmu_disable_count); 5458 free_percpu(pmu->pmu_disable_count);
5250 goto unlock; 5459 goto unlock;
@@ -5264,6 +5473,10 @@ void perf_pmu_unregister(struct pmu *pmu)
5264 synchronize_rcu(); 5473 synchronize_rcu();
5265 5474
5266 free_percpu(pmu->pmu_disable_count); 5475 free_percpu(pmu->pmu_disable_count);
5476 if (pmu->type >= PERF_TYPE_MAX)
5477 idr_remove(&pmu_idr, pmu->type);
5478 device_del(pmu->dev);
5479 put_device(pmu->dev);
5267 free_pmu_context(pmu); 5480 free_pmu_context(pmu);
5268} 5481}
5269 5482
@@ -5273,6 +5486,13 @@ struct pmu *perf_init_event(struct perf_event *event)
5273 int idx; 5486 int idx;
5274 5487
5275 idx = srcu_read_lock(&pmus_srcu); 5488 idx = srcu_read_lock(&pmus_srcu);
5489
5490 rcu_read_lock();
5491 pmu = idr_find(&pmu_idr, event->attr.type);
5492 rcu_read_unlock();
5493 if (pmu)
5494 goto unlock;
5495
5276 list_for_each_entry_rcu(pmu, &pmus, entry) { 5496 list_for_each_entry_rcu(pmu, &pmus, entry) {
5277 int ret = pmu->event_init(event); 5497 int ret = pmu->event_init(event);
5278 if (!ret) 5498 if (!ret)
@@ -5738,6 +5958,12 @@ SYSCALL_DEFINE5(perf_event_open,
5738 mutex_unlock(&current->perf_event_mutex); 5958 mutex_unlock(&current->perf_event_mutex);
5739 5959
5740 /* 5960 /*
5961 * Precalculate sample_data sizes
5962 */
5963 perf_event__header_size(event);
5964 perf_event__id_header_size(event);
5965
5966 /*
5741 * Drop the reference on the group_event after placing the 5967 * Drop the reference on the group_event after placing the
5742 * new event on the sibling_list. This ensures destruction 5968 * new event on the sibling_list. This ensures destruction
5743 * of the group leader will find the pointer to itself in 5969 * of the group leader will find the pointer to itself in
@@ -6090,6 +6316,12 @@ inherit_event(struct perf_event *parent_event,
6090 child_event->overflow_handler = parent_event->overflow_handler; 6316 child_event->overflow_handler = parent_event->overflow_handler;
6091 6317
6092 /* 6318 /*
6319 * Precalculate sample_data sizes
6320 */
6321 perf_event__header_size(child_event);
6322 perf_event__id_header_size(child_event);
6323
6324 /*
6093 * Link it up in the child's context: 6325 * Link it up in the child's context:
6094 */ 6326 */
6095 raw_spin_lock_irqsave(&child_ctx->lock, flags); 6327 raw_spin_lock_irqsave(&child_ctx->lock, flags);
@@ -6320,7 +6552,7 @@ static void __cpuinit perf_event_init_cpu(int cpu)
6320 mutex_unlock(&swhash->hlist_mutex); 6552 mutex_unlock(&swhash->hlist_mutex);
6321} 6553}
6322 6554
6323#ifdef CONFIG_HOTPLUG_CPU 6555#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC
6324static void perf_pmu_rotate_stop(struct pmu *pmu) 6556static void perf_pmu_rotate_stop(struct pmu *pmu)
6325{ 6557{
6326 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); 6558 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
@@ -6374,6 +6606,26 @@ static void perf_event_exit_cpu(int cpu)
6374static inline void perf_event_exit_cpu(int cpu) { } 6606static inline void perf_event_exit_cpu(int cpu) { }
6375#endif 6607#endif
6376 6608
6609static int
6610perf_reboot(struct notifier_block *notifier, unsigned long val, void *v)
6611{
6612 int cpu;
6613
6614 for_each_online_cpu(cpu)
6615 perf_event_exit_cpu(cpu);
6616
6617 return NOTIFY_OK;
6618}
6619
6620/*
6621 * Run the perf reboot notifier at the very last possible moment so that
6622 * the generic watchdog code runs as long as possible.
6623 */
6624static struct notifier_block perf_reboot_notifier = {
6625 .notifier_call = perf_reboot,
6626 .priority = INT_MIN,
6627};
6628
6377static int __cpuinit 6629static int __cpuinit
6378perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) 6630perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
6379{ 6631{
@@ -6402,14 +6654,45 @@ void __init perf_event_init(void)
6402{ 6654{
6403 int ret; 6655 int ret;
6404 6656
6657 idr_init(&pmu_idr);
6658
6405 perf_event_init_all_cpus(); 6659 perf_event_init_all_cpus();
6406 init_srcu_struct(&pmus_srcu); 6660 init_srcu_struct(&pmus_srcu);
6407 perf_pmu_register(&perf_swevent); 6661 perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE);
6408 perf_pmu_register(&perf_cpu_clock); 6662 perf_pmu_register(&perf_cpu_clock, NULL, -1);
6409 perf_pmu_register(&perf_task_clock); 6663 perf_pmu_register(&perf_task_clock, NULL, -1);
6410 perf_tp_register(); 6664 perf_tp_register();
6411 perf_cpu_notifier(perf_cpu_notify); 6665 perf_cpu_notifier(perf_cpu_notify);
6666 register_reboot_notifier(&perf_reboot_notifier);
6412 6667
6413 ret = init_hw_breakpoint(); 6668 ret = init_hw_breakpoint();
6414 WARN(ret, "hw_breakpoint initialization failed with: %d", ret); 6669 WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
6415} 6670}
6671
6672static int __init perf_event_sysfs_init(void)
6673{
6674 struct pmu *pmu;
6675 int ret;
6676
6677 mutex_lock(&pmus_lock);
6678
6679 ret = bus_register(&pmu_bus);
6680 if (ret)
6681 goto unlock;
6682
6683 list_for_each_entry(pmu, &pmus, entry) {
6684 if (!pmu->name || pmu->type < 0)
6685 continue;
6686
6687 ret = pmu_dev_alloc(pmu);
6688 WARN(ret, "Failed to register pmu: %s, reason %d\n", pmu->name, ret);
6689 }
6690 pmu_bus_running = 1;
6691 ret = 0;
6692
6693unlock:
6694 mutex_unlock(&pmus_lock);
6695
6696 return ret;
6697}
6698device_initcall(perf_event_sysfs_init);
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 9ca4973f736d..93bd2eb2bc53 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -145,7 +145,13 @@ static int common_timer_del(struct k_itimer *timer);
145 145
146static enum hrtimer_restart posix_timer_fn(struct hrtimer *data); 146static enum hrtimer_restart posix_timer_fn(struct hrtimer *data);
147 147
148static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags); 148static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags);
149
150#define lock_timer(tid, flags) \
151({ struct k_itimer *__timr; \
152 __cond_lock(&__timr->it_lock, __timr = __lock_timer(tid, flags)); \
153 __timr; \
154})
149 155
150static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) 156static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
151{ 157{
@@ -619,7 +625,7 @@ out:
619 * the find to the timer lock. To avoid a dead lock, the timer id MUST 625 * the find to the timer lock. To avoid a dead lock, the timer id MUST
620 * be release with out holding the timer lock. 626 * be release with out holding the timer lock.
621 */ 627 */
622static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags) 628static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags)
623{ 629{
624 struct k_itimer *timr; 630 struct k_itimer *timr;
625 /* 631 /*
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index ecf770509d0d..031d5e3a6197 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -22,6 +22,7 @@
22#include <linux/mm.h> 22#include <linux/mm.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/suspend.h> 24#include <linux/suspend.h>
25#include <trace/events/power.h>
25 26
26#include "power.h" 27#include "power.h"
27 28
@@ -201,6 +202,7 @@ int suspend_devices_and_enter(suspend_state_t state)
201 if (!suspend_ops) 202 if (!suspend_ops)
202 return -ENOSYS; 203 return -ENOSYS;
203 204
205 trace_machine_suspend(state);
204 if (suspend_ops->begin) { 206 if (suspend_ops->begin) {
205 error = suspend_ops->begin(state); 207 error = suspend_ops->begin(state);
206 if (error) 208 if (error)
@@ -229,6 +231,7 @@ int suspend_devices_and_enter(suspend_state_t state)
229 Close: 231 Close:
230 if (suspend_ops->end) 232 if (suspend_ops->end)
231 suspend_ops->end(); 233 suspend_ops->end();
234 trace_machine_suspend(PWR_EVENT_EXIT);
232 return error; 235 return error;
233 236
234 Recover_platform: 237 Recover_platform:
diff --git a/kernel/printk.c b/kernel/printk.c
index a23315dc4498..ab3ffc5b3b64 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1074,17 +1074,17 @@ static DEFINE_PER_CPU(int, printk_pending);
1074 1074
1075void printk_tick(void) 1075void printk_tick(void)
1076{ 1076{
1077 if (__get_cpu_var(printk_pending)) { 1077 if (__this_cpu_read(printk_pending)) {
1078 __get_cpu_var(printk_pending) = 0; 1078 __this_cpu_write(printk_pending, 0);
1079 wake_up_interruptible(&log_wait); 1079 wake_up_interruptible(&log_wait);
1080 } 1080 }
1081} 1081}
1082 1082
1083int printk_needs_cpu(int cpu) 1083int printk_needs_cpu(int cpu)
1084{ 1084{
1085 if (unlikely(cpu_is_offline(cpu))) 1085 if (cpu_is_offline(cpu))
1086 printk_tick(); 1086 printk_tick();
1087 return per_cpu(printk_pending, cpu); 1087 return __this_cpu_read(printk_pending);
1088} 1088}
1089 1089
1090void wake_up_klogd(void) 1090void wake_up_klogd(void)
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index d806735342ac..034493724749 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -36,31 +36,16 @@
36#include <linux/time.h> 36#include <linux/time.h>
37#include <linux/cpu.h> 37#include <linux/cpu.h>
38 38
39/* Global control variables for rcupdate callback mechanism. */ 39/* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */
40struct rcu_ctrlblk { 40static struct task_struct *rcu_kthread_task;
41 struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ 41static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq);
42 struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ 42static unsigned long have_rcu_kthread_work;
43 struct rcu_head **curtail; /* ->next pointer of last CB. */ 43static void invoke_rcu_kthread(void);
44};
45
46/* Definition for rcupdate control block. */
47static struct rcu_ctrlblk rcu_sched_ctrlblk = {
48 .donetail = &rcu_sched_ctrlblk.rcucblist,
49 .curtail = &rcu_sched_ctrlblk.rcucblist,
50};
51
52static struct rcu_ctrlblk rcu_bh_ctrlblk = {
53 .donetail = &rcu_bh_ctrlblk.rcucblist,
54 .curtail = &rcu_bh_ctrlblk.rcucblist,
55};
56
57#ifdef CONFIG_DEBUG_LOCK_ALLOC
58int rcu_scheduler_active __read_mostly;
59EXPORT_SYMBOL_GPL(rcu_scheduler_active);
60#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
61 44
62/* Forward declarations for rcutiny_plugin.h. */ 45/* Forward declarations for rcutiny_plugin.h. */
63static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); 46struct rcu_ctrlblk;
47static void rcu_process_callbacks(struct rcu_ctrlblk *rcp);
48static int rcu_kthread(void *arg);
64static void __call_rcu(struct rcu_head *head, 49static void __call_rcu(struct rcu_head *head,
65 void (*func)(struct rcu_head *rcu), 50 void (*func)(struct rcu_head *rcu),
66 struct rcu_ctrlblk *rcp); 51 struct rcu_ctrlblk *rcp);
@@ -123,7 +108,7 @@ void rcu_sched_qs(int cpu)
123{ 108{
124 if (rcu_qsctr_help(&rcu_sched_ctrlblk) + 109 if (rcu_qsctr_help(&rcu_sched_ctrlblk) +
125 rcu_qsctr_help(&rcu_bh_ctrlblk)) 110 rcu_qsctr_help(&rcu_bh_ctrlblk))
126 raise_softirq(RCU_SOFTIRQ); 111 invoke_rcu_kthread();
127} 112}
128 113
129/* 114/*
@@ -132,7 +117,7 @@ void rcu_sched_qs(int cpu)
132void rcu_bh_qs(int cpu) 117void rcu_bh_qs(int cpu)
133{ 118{
134 if (rcu_qsctr_help(&rcu_bh_ctrlblk)) 119 if (rcu_qsctr_help(&rcu_bh_ctrlblk))
135 raise_softirq(RCU_SOFTIRQ); 120 invoke_rcu_kthread();
136} 121}
137 122
138/* 123/*
@@ -152,13 +137,14 @@ void rcu_check_callbacks(int cpu, int user)
152} 137}
153 138
154/* 139/*
155 * Helper function for rcu_process_callbacks() that operates on the 140 * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure
156 * specified rcu_ctrlkblk structure. 141 * whose grace period has elapsed.
157 */ 142 */
158static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) 143static void rcu_process_callbacks(struct rcu_ctrlblk *rcp)
159{ 144{
160 struct rcu_head *next, *list; 145 struct rcu_head *next, *list;
161 unsigned long flags; 146 unsigned long flags;
147 RCU_TRACE(int cb_count = 0);
162 148
163 /* If no RCU callbacks ready to invoke, just return. */ 149 /* If no RCU callbacks ready to invoke, just return. */
164 if (&rcp->rcucblist == rcp->donetail) 150 if (&rcp->rcucblist == rcp->donetail)
@@ -180,19 +166,58 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
180 next = list->next; 166 next = list->next;
181 prefetch(next); 167 prefetch(next);
182 debug_rcu_head_unqueue(list); 168 debug_rcu_head_unqueue(list);
169 local_bh_disable();
183 list->func(list); 170 list->func(list);
171 local_bh_enable();
184 list = next; 172 list = next;
173 RCU_TRACE(cb_count++);
185 } 174 }
175 RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count));
186} 176}
187 177
188/* 178/*
189 * Invoke any callbacks whose grace period has completed. 179 * This kthread invokes RCU callbacks whose grace periods have
180 * elapsed. It is awakened as needed, and takes the place of the
181 * RCU_SOFTIRQ that was used previously for this purpose.
182 * This is a kthread, but it is never stopped, at least not until
183 * the system goes down.
190 */ 184 */
191static void rcu_process_callbacks(struct softirq_action *unused) 185static int rcu_kthread(void *arg)
192{ 186{
193 __rcu_process_callbacks(&rcu_sched_ctrlblk); 187 unsigned long work;
194 __rcu_process_callbacks(&rcu_bh_ctrlblk); 188 unsigned long morework;
195 rcu_preempt_process_callbacks(); 189 unsigned long flags;
190
191 for (;;) {
192 wait_event(rcu_kthread_wq, have_rcu_kthread_work != 0);
193 morework = rcu_boost();
194 local_irq_save(flags);
195 work = have_rcu_kthread_work;
196 have_rcu_kthread_work = morework;
197 local_irq_restore(flags);
198 if (work) {
199 rcu_process_callbacks(&rcu_sched_ctrlblk);
200 rcu_process_callbacks(&rcu_bh_ctrlblk);
201 rcu_preempt_process_callbacks();
202 }
203 schedule_timeout_interruptible(1); /* Leave CPU for others. */
204 }
205
206 return 0; /* Not reached, but needed to shut gcc up. */
207}
208
209/*
210 * Wake up rcu_kthread() to process callbacks now eligible for invocation
211 * or to boost readers.
212 */
213static void invoke_rcu_kthread(void)
214{
215 unsigned long flags;
216
217 local_irq_save(flags);
218 have_rcu_kthread_work = 1;
219 wake_up(&rcu_kthread_wq);
220 local_irq_restore(flags);
196} 221}
197 222
198/* 223/*
@@ -230,6 +255,7 @@ static void __call_rcu(struct rcu_head *head,
230 local_irq_save(flags); 255 local_irq_save(flags);
231 *rcp->curtail = head; 256 *rcp->curtail = head;
232 rcp->curtail = &head->next; 257 rcp->curtail = &head->next;
258 RCU_TRACE(rcp->qlen++);
233 local_irq_restore(flags); 259 local_irq_restore(flags);
234} 260}
235 261
@@ -282,7 +308,16 @@ void rcu_barrier_sched(void)
282} 308}
283EXPORT_SYMBOL_GPL(rcu_barrier_sched); 309EXPORT_SYMBOL_GPL(rcu_barrier_sched);
284 310
285void __init rcu_init(void) 311/*
312 * Spawn the kthread that invokes RCU callbacks.
313 */
314static int __init rcu_spawn_kthreads(void)
286{ 315{
287 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); 316 struct sched_param sp;
317
318 rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread");
319 sp.sched_priority = RCU_BOOST_PRIO;
320 sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp);
321 return 0;
288} 322}
323early_initcall(rcu_spawn_kthreads);
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index 6ceca4f745ff..015abaea962a 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -22,6 +22,40 @@
22 * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> 22 * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
23 */ 23 */
24 24
25#include <linux/kthread.h>
26#include <linux/debugfs.h>
27#include <linux/seq_file.h>
28
29#ifdef CONFIG_RCU_TRACE
30#define RCU_TRACE(stmt) stmt
31#else /* #ifdef CONFIG_RCU_TRACE */
32#define RCU_TRACE(stmt)
33#endif /* #else #ifdef CONFIG_RCU_TRACE */
34
35/* Global control variables for rcupdate callback mechanism. */
36struct rcu_ctrlblk {
37 struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */
38 struct rcu_head **donetail; /* ->next pointer of last "done" CB. */
39 struct rcu_head **curtail; /* ->next pointer of last CB. */
40 RCU_TRACE(long qlen); /* Number of pending CBs. */
41};
42
43/* Definition for rcupdate control block. */
44static struct rcu_ctrlblk rcu_sched_ctrlblk = {
45 .donetail = &rcu_sched_ctrlblk.rcucblist,
46 .curtail = &rcu_sched_ctrlblk.rcucblist,
47};
48
49static struct rcu_ctrlblk rcu_bh_ctrlblk = {
50 .donetail = &rcu_bh_ctrlblk.rcucblist,
51 .curtail = &rcu_bh_ctrlblk.rcucblist,
52};
53
54#ifdef CONFIG_DEBUG_LOCK_ALLOC
55int rcu_scheduler_active __read_mostly;
56EXPORT_SYMBOL_GPL(rcu_scheduler_active);
57#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
58
25#ifdef CONFIG_TINY_PREEMPT_RCU 59#ifdef CONFIG_TINY_PREEMPT_RCU
26 60
27#include <linux/delay.h> 61#include <linux/delay.h>
@@ -46,17 +80,45 @@ struct rcu_preempt_ctrlblk {
46 struct list_head *gp_tasks; 80 struct list_head *gp_tasks;
47 /* Pointer to the first task blocking the */ 81 /* Pointer to the first task blocking the */
48 /* current grace period, or NULL if there */ 82 /* current grace period, or NULL if there */
49 /* is not such task. */ 83 /* is no such task. */
50 struct list_head *exp_tasks; 84 struct list_head *exp_tasks;
51 /* Pointer to first task blocking the */ 85 /* Pointer to first task blocking the */
52 /* current expedited grace period, or NULL */ 86 /* current expedited grace period, or NULL */
53 /* if there is no such task. If there */ 87 /* if there is no such task. If there */
54 /* is no current expedited grace period, */ 88 /* is no current expedited grace period, */
55 /* then there cannot be any such task. */ 89 /* then there cannot be any such task. */
90#ifdef CONFIG_RCU_BOOST
91 struct list_head *boost_tasks;
92 /* Pointer to first task that needs to be */
93 /* priority-boosted, or NULL if no priority */
94 /* boosting is needed. If there is no */
95 /* current or expedited grace period, there */
96 /* can be no such task. */
97#endif /* #ifdef CONFIG_RCU_BOOST */
56 u8 gpnum; /* Current grace period. */ 98 u8 gpnum; /* Current grace period. */
57 u8 gpcpu; /* Last grace period blocked by the CPU. */ 99 u8 gpcpu; /* Last grace period blocked by the CPU. */
58 u8 completed; /* Last grace period completed. */ 100 u8 completed; /* Last grace period completed. */
59 /* If all three are equal, RCU is idle. */ 101 /* If all three are equal, RCU is idle. */
102#ifdef CONFIG_RCU_BOOST
103 s8 boosted_this_gp; /* Has boosting already happened? */
104 unsigned long boost_time; /* When to start boosting (jiffies) */
105#endif /* #ifdef CONFIG_RCU_BOOST */
106#ifdef CONFIG_RCU_TRACE
107 unsigned long n_grace_periods;
108#ifdef CONFIG_RCU_BOOST
109 unsigned long n_tasks_boosted;
110 unsigned long n_exp_boosts;
111 unsigned long n_normal_boosts;
112 unsigned long n_normal_balk_blkd_tasks;
113 unsigned long n_normal_balk_gp_tasks;
114 unsigned long n_normal_balk_boost_tasks;
115 unsigned long n_normal_balk_boosted;
116 unsigned long n_normal_balk_notyet;
117 unsigned long n_normal_balk_nos;
118 unsigned long n_exp_balk_blkd_tasks;
119 unsigned long n_exp_balk_nos;
120#endif /* #ifdef CONFIG_RCU_BOOST */
121#endif /* #ifdef CONFIG_RCU_TRACE */
60}; 122};
61 123
62static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { 124static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
@@ -122,6 +184,210 @@ static int rcu_preempt_gp_in_progress(void)
122} 184}
123 185
124/* 186/*
187 * Advance a ->blkd_tasks-list pointer to the next entry, instead
188 * returning NULL if at the end of the list.
189 */
190static struct list_head *rcu_next_node_entry(struct task_struct *t)
191{
192 struct list_head *np;
193
194 np = t->rcu_node_entry.next;
195 if (np == &rcu_preempt_ctrlblk.blkd_tasks)
196 np = NULL;
197 return np;
198}
199
200#ifdef CONFIG_RCU_TRACE
201
202#ifdef CONFIG_RCU_BOOST
203static void rcu_initiate_boost_trace(void);
204static void rcu_initiate_exp_boost_trace(void);
205#endif /* #ifdef CONFIG_RCU_BOOST */
206
207/*
208 * Dump additional statistice for TINY_PREEMPT_RCU.
209 */
210static void show_tiny_preempt_stats(struct seq_file *m)
211{
212 seq_printf(m, "rcu_preempt: qlen=%ld gp=%lu g%u/p%u/c%u tasks=%c%c%c\n",
213 rcu_preempt_ctrlblk.rcb.qlen,
214 rcu_preempt_ctrlblk.n_grace_periods,
215 rcu_preempt_ctrlblk.gpnum,
216 rcu_preempt_ctrlblk.gpcpu,
217 rcu_preempt_ctrlblk.completed,
218 "T."[list_empty(&rcu_preempt_ctrlblk.blkd_tasks)],
219 "N."[!rcu_preempt_ctrlblk.gp_tasks],
220 "E."[!rcu_preempt_ctrlblk.exp_tasks]);
221#ifdef CONFIG_RCU_BOOST
222 seq_printf(m, " ttb=%c btg=",
223 "B."[!rcu_preempt_ctrlblk.boost_tasks]);
224 switch (rcu_preempt_ctrlblk.boosted_this_gp) {
225 case -1:
226 seq_puts(m, "exp");
227 break;
228 case 0:
229 seq_puts(m, "no");
230 break;
231 case 1:
232 seq_puts(m, "begun");
233 break;
234 case 2:
235 seq_puts(m, "done");
236 break;
237 default:
238 seq_printf(m, "?%d?", rcu_preempt_ctrlblk.boosted_this_gp);
239 }
240 seq_printf(m, " ntb=%lu neb=%lu nnb=%lu j=%04x bt=%04x\n",
241 rcu_preempt_ctrlblk.n_tasks_boosted,
242 rcu_preempt_ctrlblk.n_exp_boosts,
243 rcu_preempt_ctrlblk.n_normal_boosts,
244 (int)(jiffies & 0xffff),
245 (int)(rcu_preempt_ctrlblk.boost_time & 0xffff));
246 seq_printf(m, " %s: nt=%lu gt=%lu bt=%lu b=%lu ny=%lu nos=%lu\n",
247 "normal balk",
248 rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks,
249 rcu_preempt_ctrlblk.n_normal_balk_gp_tasks,
250 rcu_preempt_ctrlblk.n_normal_balk_boost_tasks,
251 rcu_preempt_ctrlblk.n_normal_balk_boosted,
252 rcu_preempt_ctrlblk.n_normal_balk_notyet,
253 rcu_preempt_ctrlblk.n_normal_balk_nos);
254 seq_printf(m, " exp balk: bt=%lu nos=%lu\n",
255 rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks,
256 rcu_preempt_ctrlblk.n_exp_balk_nos);
257#endif /* #ifdef CONFIG_RCU_BOOST */
258}
259
260#endif /* #ifdef CONFIG_RCU_TRACE */
261
262#ifdef CONFIG_RCU_BOOST
263
264#include "rtmutex_common.h"
265
266/*
267 * Carry out RCU priority boosting on the task indicated by ->boost_tasks,
268 * and advance ->boost_tasks to the next task in the ->blkd_tasks list.
269 */
270static int rcu_boost(void)
271{
272 unsigned long flags;
273 struct rt_mutex mtx;
274 struct list_head *np;
275 struct task_struct *t;
276
277 if (rcu_preempt_ctrlblk.boost_tasks == NULL)
278 return 0; /* Nothing to boost. */
279 raw_local_irq_save(flags);
280 rcu_preempt_ctrlblk.boosted_this_gp++;
281 t = container_of(rcu_preempt_ctrlblk.boost_tasks, struct task_struct,
282 rcu_node_entry);
283 np = rcu_next_node_entry(t);
284 rt_mutex_init_proxy_locked(&mtx, t);
285 t->rcu_boost_mutex = &mtx;
286 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
287 raw_local_irq_restore(flags);
288 rt_mutex_lock(&mtx);
289 RCU_TRACE(rcu_preempt_ctrlblk.n_tasks_boosted++);
290 rcu_preempt_ctrlblk.boosted_this_gp++;
291 rt_mutex_unlock(&mtx);
292 return rcu_preempt_ctrlblk.boost_tasks != NULL;
293}
294
295/*
296 * Check to see if it is now time to start boosting RCU readers blocking
297 * the current grace period, and, if so, tell the rcu_kthread_task to
298 * start boosting them. If there is an expedited boost in progress,
299 * we wait for it to complete.
300 *
301 * If there are no blocked readers blocking the current grace period,
302 * return 0 to let the caller know, otherwise return 1. Note that this
303 * return value is independent of whether or not boosting was done.
304 */
305static int rcu_initiate_boost(void)
306{
307 if (!rcu_preempt_blocked_readers_cgp()) {
308 RCU_TRACE(rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks++);
309 return 0;
310 }
311 if (rcu_preempt_ctrlblk.gp_tasks != NULL &&
312 rcu_preempt_ctrlblk.boost_tasks == NULL &&
313 rcu_preempt_ctrlblk.boosted_this_gp == 0 &&
314 ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) {
315 rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks;
316 invoke_rcu_kthread();
317 RCU_TRACE(rcu_preempt_ctrlblk.n_normal_boosts++);
318 } else
319 RCU_TRACE(rcu_initiate_boost_trace());
320 return 1;
321}
322
323/*
324 * Initiate boosting for an expedited grace period.
325 */
326static void rcu_initiate_expedited_boost(void)
327{
328 unsigned long flags;
329
330 raw_local_irq_save(flags);
331 if (!list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) {
332 rcu_preempt_ctrlblk.boost_tasks =
333 rcu_preempt_ctrlblk.blkd_tasks.next;
334 rcu_preempt_ctrlblk.boosted_this_gp = -1;
335 invoke_rcu_kthread();
336 RCU_TRACE(rcu_preempt_ctrlblk.n_exp_boosts++);
337 } else
338 RCU_TRACE(rcu_initiate_exp_boost_trace());
339 raw_local_irq_restore(flags);
340}
341
342#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000);
343
344/*
345 * Do priority-boost accounting for the start of a new grace period.
346 */
347static void rcu_preempt_boost_start_gp(void)
348{
349 rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
350 if (rcu_preempt_ctrlblk.boosted_this_gp > 0)
351 rcu_preempt_ctrlblk.boosted_this_gp = 0;
352}
353
354#else /* #ifdef CONFIG_RCU_BOOST */
355
356/*
357 * If there is no RCU priority boosting, we don't boost.
358 */
359static int rcu_boost(void)
360{
361 return 0;
362}
363
364/*
365 * If there is no RCU priority boosting, we don't initiate boosting,
366 * but we do indicate whether there are blocked readers blocking the
367 * current grace period.
368 */
369static int rcu_initiate_boost(void)
370{
371 return rcu_preempt_blocked_readers_cgp();
372}
373
374/*
375 * If there is no RCU priority boosting, we don't initiate expedited boosting.
376 */
377static void rcu_initiate_expedited_boost(void)
378{
379}
380
381/*
382 * If there is no RCU priority boosting, nothing to do at grace-period start.
383 */
384static void rcu_preempt_boost_start_gp(void)
385{
386}
387
388#endif /* else #ifdef CONFIG_RCU_BOOST */
389
390/*
125 * Record a preemptible-RCU quiescent state for the specified CPU. Note 391 * Record a preemptible-RCU quiescent state for the specified CPU. Note
126 * that this just means that the task currently running on the CPU is 392 * that this just means that the task currently running on the CPU is
127 * in a quiescent state. There might be any number of tasks blocked 393 * in a quiescent state. There might be any number of tasks blocked
@@ -148,11 +414,14 @@ static void rcu_preempt_cpu_qs(void)
148 rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum; 414 rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum;
149 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; 415 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
150 416
417 /* If there is no GP then there is nothing more to do. */
418 if (!rcu_preempt_gp_in_progress())
419 return;
151 /* 420 /*
152 * If there is no GP, or if blocked readers are still blocking GP, 421 * Check up on boosting. If there are no readers blocking the
153 * then there is nothing more to do. 422 * current grace period, leave.
154 */ 423 */
155 if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp()) 424 if (rcu_initiate_boost())
156 return; 425 return;
157 426
158 /* Advance callbacks. */ 427 /* Advance callbacks. */
@@ -164,9 +433,9 @@ static void rcu_preempt_cpu_qs(void)
164 if (!rcu_preempt_blocked_readers_any()) 433 if (!rcu_preempt_blocked_readers_any())
165 rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail; 434 rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail;
166 435
167 /* If there are done callbacks, make RCU_SOFTIRQ process them. */ 436 /* If there are done callbacks, cause them to be invoked. */
168 if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) 437 if (*rcu_preempt_ctrlblk.rcb.donetail != NULL)
169 raise_softirq(RCU_SOFTIRQ); 438 invoke_rcu_kthread();
170} 439}
171 440
172/* 441/*
@@ -178,12 +447,16 @@ static void rcu_preempt_start_gp(void)
178 447
179 /* Official start of GP. */ 448 /* Official start of GP. */
180 rcu_preempt_ctrlblk.gpnum++; 449 rcu_preempt_ctrlblk.gpnum++;
450 RCU_TRACE(rcu_preempt_ctrlblk.n_grace_periods++);
181 451
182 /* Any blocked RCU readers block new GP. */ 452 /* Any blocked RCU readers block new GP. */
183 if (rcu_preempt_blocked_readers_any()) 453 if (rcu_preempt_blocked_readers_any())
184 rcu_preempt_ctrlblk.gp_tasks = 454 rcu_preempt_ctrlblk.gp_tasks =
185 rcu_preempt_ctrlblk.blkd_tasks.next; 455 rcu_preempt_ctrlblk.blkd_tasks.next;
186 456
457 /* Set up for RCU priority boosting. */
458 rcu_preempt_boost_start_gp();
459
187 /* If there is no running reader, CPU is done with GP. */ 460 /* If there is no running reader, CPU is done with GP. */
188 if (!rcu_preempt_running_reader()) 461 if (!rcu_preempt_running_reader())
189 rcu_preempt_cpu_qs(); 462 rcu_preempt_cpu_qs();
@@ -304,14 +577,16 @@ static void rcu_read_unlock_special(struct task_struct *t)
304 */ 577 */
305 empty = !rcu_preempt_blocked_readers_cgp(); 578 empty = !rcu_preempt_blocked_readers_cgp();
306 empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL; 579 empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL;
307 np = t->rcu_node_entry.next; 580 np = rcu_next_node_entry(t);
308 if (np == &rcu_preempt_ctrlblk.blkd_tasks)
309 np = NULL;
310 list_del(&t->rcu_node_entry); 581 list_del(&t->rcu_node_entry);
311 if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks) 582 if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks)
312 rcu_preempt_ctrlblk.gp_tasks = np; 583 rcu_preempt_ctrlblk.gp_tasks = np;
313 if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks) 584 if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks)
314 rcu_preempt_ctrlblk.exp_tasks = np; 585 rcu_preempt_ctrlblk.exp_tasks = np;
586#ifdef CONFIG_RCU_BOOST
587 if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks)
588 rcu_preempt_ctrlblk.boost_tasks = np;
589#endif /* #ifdef CONFIG_RCU_BOOST */
315 INIT_LIST_HEAD(&t->rcu_node_entry); 590 INIT_LIST_HEAD(&t->rcu_node_entry);
316 591
317 /* 592 /*
@@ -331,6 +606,14 @@ static void rcu_read_unlock_special(struct task_struct *t)
331 if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL) 606 if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL)
332 rcu_report_exp_done(); 607 rcu_report_exp_done();
333 } 608 }
609#ifdef CONFIG_RCU_BOOST
610 /* Unboost self if was boosted. */
611 if (special & RCU_READ_UNLOCK_BOOSTED) {
612 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED;
613 rt_mutex_unlock(t->rcu_boost_mutex);
614 t->rcu_boost_mutex = NULL;
615 }
616#endif /* #ifdef CONFIG_RCU_BOOST */
334 local_irq_restore(flags); 617 local_irq_restore(flags);
335} 618}
336 619
@@ -374,7 +657,7 @@ static void rcu_preempt_check_callbacks(void)
374 rcu_preempt_cpu_qs(); 657 rcu_preempt_cpu_qs();
375 if (&rcu_preempt_ctrlblk.rcb.rcucblist != 658 if (&rcu_preempt_ctrlblk.rcb.rcucblist !=
376 rcu_preempt_ctrlblk.rcb.donetail) 659 rcu_preempt_ctrlblk.rcb.donetail)
377 raise_softirq(RCU_SOFTIRQ); 660 invoke_rcu_kthread();
378 if (rcu_preempt_gp_in_progress() && 661 if (rcu_preempt_gp_in_progress() &&
379 rcu_cpu_blocking_cur_gp() && 662 rcu_cpu_blocking_cur_gp() &&
380 rcu_preempt_running_reader()) 663 rcu_preempt_running_reader())
@@ -383,7 +666,7 @@ static void rcu_preempt_check_callbacks(void)
383 666
384/* 667/*
385 * TINY_PREEMPT_RCU has an extra callback-list tail pointer to 668 * TINY_PREEMPT_RCU has an extra callback-list tail pointer to
386 * update, so this is invoked from __rcu_process_callbacks() to 669 * update, so this is invoked from rcu_process_callbacks() to
387 * handle that case. Of course, it is invoked for all flavors of 670 * handle that case. Of course, it is invoked for all flavors of
388 * RCU, but RCU callbacks can appear only on one of the lists, and 671 * RCU, but RCU callbacks can appear only on one of the lists, and
389 * neither ->nexttail nor ->donetail can possibly be NULL, so there 672 * neither ->nexttail nor ->donetail can possibly be NULL, so there
@@ -400,7 +683,7 @@ static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
400 */ 683 */
401static void rcu_preempt_process_callbacks(void) 684static void rcu_preempt_process_callbacks(void)
402{ 685{
403 __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); 686 rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
404} 687}
405 688
406/* 689/*
@@ -417,6 +700,7 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
417 local_irq_save(flags); 700 local_irq_save(flags);
418 *rcu_preempt_ctrlblk.nexttail = head; 701 *rcu_preempt_ctrlblk.nexttail = head;
419 rcu_preempt_ctrlblk.nexttail = &head->next; 702 rcu_preempt_ctrlblk.nexttail = &head->next;
703 RCU_TRACE(rcu_preempt_ctrlblk.rcb.qlen++);
420 rcu_preempt_start_gp(); /* checks to see if GP needed. */ 704 rcu_preempt_start_gp(); /* checks to see if GP needed. */
421 local_irq_restore(flags); 705 local_irq_restore(flags);
422} 706}
@@ -532,6 +816,7 @@ void synchronize_rcu_expedited(void)
532 816
533 /* Wait for tail of ->blkd_tasks list to drain. */ 817 /* Wait for tail of ->blkd_tasks list to drain. */
534 if (rcu_preempted_readers_exp()) 818 if (rcu_preempted_readers_exp())
819 rcu_initiate_expedited_boost();
535 wait_event(sync_rcu_preempt_exp_wq, 820 wait_event(sync_rcu_preempt_exp_wq,
536 !rcu_preempted_readers_exp()); 821 !rcu_preempted_readers_exp());
537 822
@@ -572,6 +857,27 @@ void exit_rcu(void)
572 857
573#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ 858#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */
574 859
860#ifdef CONFIG_RCU_TRACE
861
862/*
863 * Because preemptible RCU does not exist, it is not necessary to
864 * dump out its statistics.
865 */
866static void show_tiny_preempt_stats(struct seq_file *m)
867{
868}
869
870#endif /* #ifdef CONFIG_RCU_TRACE */
871
872/*
873 * Because preemptible RCU does not exist, it is never necessary to
874 * boost preempted RCU readers.
875 */
876static int rcu_boost(void)
877{
878 return 0;
879}
880
575/* 881/*
576 * Because preemptible RCU does not exist, it never has any callbacks 882 * Because preemptible RCU does not exist, it never has any callbacks
577 * to check. 883 * to check.
@@ -599,17 +905,116 @@ static void rcu_preempt_process_callbacks(void)
599#endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */ 905#endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */
600 906
601#ifdef CONFIG_DEBUG_LOCK_ALLOC 907#ifdef CONFIG_DEBUG_LOCK_ALLOC
602
603#include <linux/kernel_stat.h> 908#include <linux/kernel_stat.h>
604 909
605/* 910/*
606 * During boot, we forgive RCU lockdep issues. After this function is 911 * During boot, we forgive RCU lockdep issues. After this function is
607 * invoked, we start taking RCU lockdep issues seriously. 912 * invoked, we start taking RCU lockdep issues seriously.
608 */ 913 */
609void rcu_scheduler_starting(void) 914void __init rcu_scheduler_starting(void)
610{ 915{
611 WARN_ON(nr_context_switches() > 0); 916 WARN_ON(nr_context_switches() > 0);
612 rcu_scheduler_active = 1; 917 rcu_scheduler_active = 1;
613} 918}
614 919
615#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 920#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
921
922#ifdef CONFIG_RCU_BOOST
923#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
924#else /* #ifdef CONFIG_RCU_BOOST */
925#define RCU_BOOST_PRIO 1
926#endif /* #else #ifdef CONFIG_RCU_BOOST */
927
928#ifdef CONFIG_RCU_TRACE
929
930#ifdef CONFIG_RCU_BOOST
931
932static void rcu_initiate_boost_trace(void)
933{
934 if (rcu_preempt_ctrlblk.gp_tasks == NULL)
935 rcu_preempt_ctrlblk.n_normal_balk_gp_tasks++;
936 else if (rcu_preempt_ctrlblk.boost_tasks != NULL)
937 rcu_preempt_ctrlblk.n_normal_balk_boost_tasks++;
938 else if (rcu_preempt_ctrlblk.boosted_this_gp != 0)
939 rcu_preempt_ctrlblk.n_normal_balk_boosted++;
940 else if (!ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time))
941 rcu_preempt_ctrlblk.n_normal_balk_notyet++;
942 else
943 rcu_preempt_ctrlblk.n_normal_balk_nos++;
944}
945
946static void rcu_initiate_exp_boost_trace(void)
947{
948 if (list_empty(&rcu_preempt_ctrlblk.blkd_tasks))
949 rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks++;
950 else
951 rcu_preempt_ctrlblk.n_exp_balk_nos++;
952}
953
954#endif /* #ifdef CONFIG_RCU_BOOST */
955
956static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n)
957{
958 unsigned long flags;
959
960 raw_local_irq_save(flags);
961 rcp->qlen -= n;
962 raw_local_irq_restore(flags);
963}
964
965/*
966 * Dump statistics for TINY_RCU, such as they are.
967 */
968static int show_tiny_stats(struct seq_file *m, void *unused)
969{
970 show_tiny_preempt_stats(m);
971 seq_printf(m, "rcu_sched: qlen: %ld\n", rcu_sched_ctrlblk.qlen);
972 seq_printf(m, "rcu_bh: qlen: %ld\n", rcu_bh_ctrlblk.qlen);
973 return 0;
974}
975
976static int show_tiny_stats_open(struct inode *inode, struct file *file)
977{
978 return single_open(file, show_tiny_stats, NULL);
979}
980
981static const struct file_operations show_tiny_stats_fops = {
982 .owner = THIS_MODULE,
983 .open = show_tiny_stats_open,
984 .read = seq_read,
985 .llseek = seq_lseek,
986 .release = single_release,
987};
988
989static struct dentry *rcudir;
990
991static int __init rcutiny_trace_init(void)
992{
993 struct dentry *retval;
994
995 rcudir = debugfs_create_dir("rcu", NULL);
996 if (!rcudir)
997 goto free_out;
998 retval = debugfs_create_file("rcudata", 0444, rcudir,
999 NULL, &show_tiny_stats_fops);
1000 if (!retval)
1001 goto free_out;
1002 return 0;
1003free_out:
1004 debugfs_remove_recursive(rcudir);
1005 return 1;
1006}
1007
1008static void __exit rcutiny_trace_cleanup(void)
1009{
1010 debugfs_remove_recursive(rcudir);
1011}
1012
1013module_init(rcutiny_trace_init);
1014module_exit(rcutiny_trace_cleanup);
1015
1016MODULE_AUTHOR("Paul E. McKenney");
1017MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation");
1018MODULE_LICENSE("GPL");
1019
1020#endif /* #ifdef CONFIG_RCU_TRACE */
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 9d8e8fb2515f..89613f97ff26 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -47,6 +47,7 @@
47#include <linux/srcu.h> 47#include <linux/srcu.h>
48#include <linux/slab.h> 48#include <linux/slab.h>
49#include <asm/byteorder.h> 49#include <asm/byteorder.h>
50#include <linux/sched.h>
50 51
51MODULE_LICENSE("GPL"); 52MODULE_LICENSE("GPL");
52MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " 53MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and "
@@ -64,6 +65,9 @@ static int irqreader = 1; /* RCU readers from irq (timers). */
64static int fqs_duration = 0; /* Duration of bursts (us), 0 to disable. */ 65static int fqs_duration = 0; /* Duration of bursts (us), 0 to disable. */
65static int fqs_holdoff = 0; /* Hold time within burst (us). */ 66static int fqs_holdoff = 0; /* Hold time within burst (us). */
66static int fqs_stutter = 3; /* Wait time between bursts (s). */ 67static int fqs_stutter = 3; /* Wait time between bursts (s). */
68static int test_boost = 1; /* Test RCU prio boost: 0=no, 1=maybe, 2=yes. */
69static int test_boost_interval = 7; /* Interval between boost tests, seconds. */
70static int test_boost_duration = 4; /* Duration of each boost test, seconds. */
67static char *torture_type = "rcu"; /* What RCU implementation to torture. */ 71static char *torture_type = "rcu"; /* What RCU implementation to torture. */
68 72
69module_param(nreaders, int, 0444); 73module_param(nreaders, int, 0444);
@@ -88,6 +92,12 @@ module_param(fqs_holdoff, int, 0444);
88MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); 92MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)");
89module_param(fqs_stutter, int, 0444); 93module_param(fqs_stutter, int, 0444);
90MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); 94MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)");
95module_param(test_boost, int, 0444);
96MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes.");
97module_param(test_boost_interval, int, 0444);
98MODULE_PARM_DESC(test_boost_interval, "Interval between boost tests, seconds.");
99module_param(test_boost_duration, int, 0444);
100MODULE_PARM_DESC(test_boost_duration, "Duration of each boost test, seconds.");
91module_param(torture_type, charp, 0444); 101module_param(torture_type, charp, 0444);
92MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)"); 102MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)");
93 103
@@ -109,6 +119,7 @@ static struct task_struct *stats_task;
109static struct task_struct *shuffler_task; 119static struct task_struct *shuffler_task;
110static struct task_struct *stutter_task; 120static struct task_struct *stutter_task;
111static struct task_struct *fqs_task; 121static struct task_struct *fqs_task;
122static struct task_struct *boost_tasks[NR_CPUS];
112 123
113#define RCU_TORTURE_PIPE_LEN 10 124#define RCU_TORTURE_PIPE_LEN 10
114 125
@@ -134,6 +145,12 @@ static atomic_t n_rcu_torture_alloc_fail;
134static atomic_t n_rcu_torture_free; 145static atomic_t n_rcu_torture_free;
135static atomic_t n_rcu_torture_mberror; 146static atomic_t n_rcu_torture_mberror;
136static atomic_t n_rcu_torture_error; 147static atomic_t n_rcu_torture_error;
148static long n_rcu_torture_boost_ktrerror;
149static long n_rcu_torture_boost_rterror;
150static long n_rcu_torture_boost_allocerror;
151static long n_rcu_torture_boost_afferror;
152static long n_rcu_torture_boost_failure;
153static long n_rcu_torture_boosts;
137static long n_rcu_torture_timers; 154static long n_rcu_torture_timers;
138static struct list_head rcu_torture_removed; 155static struct list_head rcu_torture_removed;
139static cpumask_var_t shuffle_tmp_mask; 156static cpumask_var_t shuffle_tmp_mask;
@@ -147,6 +164,16 @@ static int stutter_pause_test;
147#endif 164#endif
148int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; 165int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
149 166
167#ifdef CONFIG_RCU_BOOST
168#define rcu_can_boost() 1
169#else /* #ifdef CONFIG_RCU_BOOST */
170#define rcu_can_boost() 0
171#endif /* #else #ifdef CONFIG_RCU_BOOST */
172
173static unsigned long boost_starttime; /* jiffies of next boost test start. */
174DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */
175 /* and boost task create/destroy. */
176
150/* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ 177/* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */
151 178
152#define FULLSTOP_DONTSTOP 0 /* Normal operation. */ 179#define FULLSTOP_DONTSTOP 0 /* Normal operation. */
@@ -277,6 +304,7 @@ struct rcu_torture_ops {
277 void (*fqs)(void); 304 void (*fqs)(void);
278 int (*stats)(char *page); 305 int (*stats)(char *page);
279 int irq_capable; 306 int irq_capable;
307 int can_boost;
280 char *name; 308 char *name;
281}; 309};
282 310
@@ -366,6 +394,7 @@ static struct rcu_torture_ops rcu_ops = {
366 .fqs = rcu_force_quiescent_state, 394 .fqs = rcu_force_quiescent_state,
367 .stats = NULL, 395 .stats = NULL,
368 .irq_capable = 1, 396 .irq_capable = 1,
397 .can_boost = rcu_can_boost(),
369 .name = "rcu" 398 .name = "rcu"
370}; 399};
371 400
@@ -408,6 +437,7 @@ static struct rcu_torture_ops rcu_sync_ops = {
408 .fqs = rcu_force_quiescent_state, 437 .fqs = rcu_force_quiescent_state,
409 .stats = NULL, 438 .stats = NULL,
410 .irq_capable = 1, 439 .irq_capable = 1,
440 .can_boost = rcu_can_boost(),
411 .name = "rcu_sync" 441 .name = "rcu_sync"
412}; 442};
413 443
@@ -424,6 +454,7 @@ static struct rcu_torture_ops rcu_expedited_ops = {
424 .fqs = rcu_force_quiescent_state, 454 .fqs = rcu_force_quiescent_state,
425 .stats = NULL, 455 .stats = NULL,
426 .irq_capable = 1, 456 .irq_capable = 1,
457 .can_boost = rcu_can_boost(),
427 .name = "rcu_expedited" 458 .name = "rcu_expedited"
428}; 459};
429 460
@@ -684,6 +715,110 @@ static struct rcu_torture_ops sched_expedited_ops = {
684}; 715};
685 716
686/* 717/*
718 * RCU torture priority-boost testing. Runs one real-time thread per
719 * CPU for moderate bursts, repeatedly registering RCU callbacks and
720 * spinning waiting for them to be invoked. If a given callback takes
721 * too long to be invoked, we assume that priority inversion has occurred.
722 */
723
724struct rcu_boost_inflight {
725 struct rcu_head rcu;
726 int inflight;
727};
728
729static void rcu_torture_boost_cb(struct rcu_head *head)
730{
731 struct rcu_boost_inflight *rbip =
732 container_of(head, struct rcu_boost_inflight, rcu);
733
734 smp_mb(); /* Ensure RCU-core accesses precede clearing ->inflight */
735 rbip->inflight = 0;
736}
737
738static int rcu_torture_boost(void *arg)
739{
740 unsigned long call_rcu_time;
741 unsigned long endtime;
742 unsigned long oldstarttime;
743 struct rcu_boost_inflight rbi = { .inflight = 0 };
744 struct sched_param sp;
745
746 VERBOSE_PRINTK_STRING("rcu_torture_boost started");
747
748 /* Set real-time priority. */
749 sp.sched_priority = 1;
750 if (sched_setscheduler(current, SCHED_FIFO, &sp) < 0) {
751 VERBOSE_PRINTK_STRING("rcu_torture_boost RT prio failed!");
752 n_rcu_torture_boost_rterror++;
753 }
754
755 /* Each pass through the following loop does one boost-test cycle. */
756 do {
757 /* Wait for the next test interval. */
758 oldstarttime = boost_starttime;
759 while (jiffies - oldstarttime > ULONG_MAX / 2) {
760 schedule_timeout_uninterruptible(1);
761 rcu_stutter_wait("rcu_torture_boost");
762 if (kthread_should_stop() ||
763 fullstop != FULLSTOP_DONTSTOP)
764 goto checkwait;
765 }
766
767 /* Do one boost-test interval. */
768 endtime = oldstarttime + test_boost_duration * HZ;
769 call_rcu_time = jiffies;
770 while (jiffies - endtime > ULONG_MAX / 2) {
771 /* If we don't have a callback in flight, post one. */
772 if (!rbi.inflight) {
773 smp_mb(); /* RCU core before ->inflight = 1. */
774 rbi.inflight = 1;
775 call_rcu(&rbi.rcu, rcu_torture_boost_cb);
776 if (jiffies - call_rcu_time >
777 test_boost_duration * HZ - HZ / 2) {
778 VERBOSE_PRINTK_STRING("rcu_torture_boost boosting failed");
779 n_rcu_torture_boost_failure++;
780 }
781 call_rcu_time = jiffies;
782 }
783 cond_resched();
784 rcu_stutter_wait("rcu_torture_boost");
785 if (kthread_should_stop() ||
786 fullstop != FULLSTOP_DONTSTOP)
787 goto checkwait;
788 }
789
790 /*
791 * Set the start time of the next test interval.
792 * Yes, this is vulnerable to long delays, but such
793 * delays simply cause a false negative for the next
794 * interval. Besides, we are running at RT priority,
795 * so delays should be relatively rare.
796 */
797 while (oldstarttime == boost_starttime) {
798 if (mutex_trylock(&boost_mutex)) {
799 boost_starttime = jiffies +
800 test_boost_interval * HZ;
801 n_rcu_torture_boosts++;
802 mutex_unlock(&boost_mutex);
803 break;
804 }
805 schedule_timeout_uninterruptible(1);
806 }
807
808 /* Go do the stutter. */
809checkwait: rcu_stutter_wait("rcu_torture_boost");
810 } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
811
812 /* Clean up and exit. */
813 VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping");
814 rcutorture_shutdown_absorb("rcu_torture_boost");
815 while (!kthread_should_stop() || rbi.inflight)
816 schedule_timeout_uninterruptible(1);
817 smp_mb(); /* order accesses to ->inflight before stack-frame death. */
818 return 0;
819}
820
821/*
687 * RCU torture force-quiescent-state kthread. Repeatedly induces 822 * RCU torture force-quiescent-state kthread. Repeatedly induces
688 * bursts of calls to force_quiescent_state(), increasing the probability 823 * bursts of calls to force_quiescent_state(), increasing the probability
689 * of occurrence of some important types of race conditions. 824 * of occurrence of some important types of race conditions.
@@ -933,7 +1068,8 @@ rcu_torture_printk(char *page)
933 cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG); 1068 cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG);
934 cnt += sprintf(&page[cnt], 1069 cnt += sprintf(&page[cnt],
935 "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d " 1070 "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d "
936 "rtmbe: %d nt: %ld", 1071 "rtmbe: %d rtbke: %ld rtbre: %ld rtbae: %ld rtbafe: %ld "
1072 "rtbf: %ld rtb: %ld nt: %ld",
937 rcu_torture_current, 1073 rcu_torture_current,
938 rcu_torture_current_version, 1074 rcu_torture_current_version,
939 list_empty(&rcu_torture_freelist), 1075 list_empty(&rcu_torture_freelist),
@@ -941,8 +1077,19 @@ rcu_torture_printk(char *page)
941 atomic_read(&n_rcu_torture_alloc_fail), 1077 atomic_read(&n_rcu_torture_alloc_fail),
942 atomic_read(&n_rcu_torture_free), 1078 atomic_read(&n_rcu_torture_free),
943 atomic_read(&n_rcu_torture_mberror), 1079 atomic_read(&n_rcu_torture_mberror),
1080 n_rcu_torture_boost_ktrerror,
1081 n_rcu_torture_boost_rterror,
1082 n_rcu_torture_boost_allocerror,
1083 n_rcu_torture_boost_afferror,
1084 n_rcu_torture_boost_failure,
1085 n_rcu_torture_boosts,
944 n_rcu_torture_timers); 1086 n_rcu_torture_timers);
945 if (atomic_read(&n_rcu_torture_mberror) != 0) 1087 if (atomic_read(&n_rcu_torture_mberror) != 0 ||
1088 n_rcu_torture_boost_ktrerror != 0 ||
1089 n_rcu_torture_boost_rterror != 0 ||
1090 n_rcu_torture_boost_allocerror != 0 ||
1091 n_rcu_torture_boost_afferror != 0 ||
1092 n_rcu_torture_boost_failure != 0)
946 cnt += sprintf(&page[cnt], " !!!"); 1093 cnt += sprintf(&page[cnt], " !!!");
947 cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG); 1094 cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
948 if (i > 1) { 1095 if (i > 1) {
@@ -1094,22 +1241,91 @@ rcu_torture_stutter(void *arg)
1094} 1241}
1095 1242
1096static inline void 1243static inline void
1097rcu_torture_print_module_parms(char *tag) 1244rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, char *tag)
1098{ 1245{
1099 printk(KERN_ALERT "%s" TORTURE_FLAG 1246 printk(KERN_ALERT "%s" TORTURE_FLAG
1100 "--- %s: nreaders=%d nfakewriters=%d " 1247 "--- %s: nreaders=%d nfakewriters=%d "
1101 "stat_interval=%d verbose=%d test_no_idle_hz=%d " 1248 "stat_interval=%d verbose=%d test_no_idle_hz=%d "
1102 "shuffle_interval=%d stutter=%d irqreader=%d " 1249 "shuffle_interval=%d stutter=%d irqreader=%d "
1103 "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d\n", 1250 "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d "
1251 "test_boost=%d/%d test_boost_interval=%d "
1252 "test_boost_duration=%d\n",
1104 torture_type, tag, nrealreaders, nfakewriters, 1253 torture_type, tag, nrealreaders, nfakewriters,
1105 stat_interval, verbose, test_no_idle_hz, shuffle_interval, 1254 stat_interval, verbose, test_no_idle_hz, shuffle_interval,
1106 stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter); 1255 stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter,
1256 test_boost, cur_ops->can_boost,
1257 test_boost_interval, test_boost_duration);
1107} 1258}
1108 1259
1109static struct notifier_block rcutorture_nb = { 1260static struct notifier_block rcutorture_shutdown_nb = {
1110 .notifier_call = rcutorture_shutdown_notify, 1261 .notifier_call = rcutorture_shutdown_notify,
1111}; 1262};
1112 1263
1264static void rcutorture_booster_cleanup(int cpu)
1265{
1266 struct task_struct *t;
1267
1268 if (boost_tasks[cpu] == NULL)
1269 return;
1270 mutex_lock(&boost_mutex);
1271 VERBOSE_PRINTK_STRING("Stopping rcu_torture_boost task");
1272 t = boost_tasks[cpu];
1273 boost_tasks[cpu] = NULL;
1274 mutex_unlock(&boost_mutex);
1275
1276 /* This must be outside of the mutex, otherwise deadlock! */
1277 kthread_stop(t);
1278}
1279
1280static int rcutorture_booster_init(int cpu)
1281{
1282 int retval;
1283
1284 if (boost_tasks[cpu] != NULL)
1285 return 0; /* Already created, nothing more to do. */
1286
1287 /* Don't allow time recalculation while creating a new task. */
1288 mutex_lock(&boost_mutex);
1289 VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task");
1290 boost_tasks[cpu] = kthread_create(rcu_torture_boost, NULL,
1291 "rcu_torture_boost");
1292 if (IS_ERR(boost_tasks[cpu])) {
1293 retval = PTR_ERR(boost_tasks[cpu]);
1294 VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed");
1295 n_rcu_torture_boost_ktrerror++;
1296 boost_tasks[cpu] = NULL;
1297 mutex_unlock(&boost_mutex);
1298 return retval;
1299 }
1300 kthread_bind(boost_tasks[cpu], cpu);
1301 wake_up_process(boost_tasks[cpu]);
1302 mutex_unlock(&boost_mutex);
1303 return 0;
1304}
1305
1306static int rcutorture_cpu_notify(struct notifier_block *self,
1307 unsigned long action, void *hcpu)
1308{
1309 long cpu = (long)hcpu;
1310
1311 switch (action) {
1312 case CPU_ONLINE:
1313 case CPU_DOWN_FAILED:
1314 (void)rcutorture_booster_init(cpu);
1315 break;
1316 case CPU_DOWN_PREPARE:
1317 rcutorture_booster_cleanup(cpu);
1318 break;
1319 default:
1320 break;
1321 }
1322 return NOTIFY_OK;
1323}
1324
1325static struct notifier_block rcutorture_cpu_nb = {
1326 .notifier_call = rcutorture_cpu_notify,
1327};
1328
1113static void 1329static void
1114rcu_torture_cleanup(void) 1330rcu_torture_cleanup(void)
1115{ 1331{
@@ -1127,7 +1343,7 @@ rcu_torture_cleanup(void)
1127 } 1343 }
1128 fullstop = FULLSTOP_RMMOD; 1344 fullstop = FULLSTOP_RMMOD;
1129 mutex_unlock(&fullstop_mutex); 1345 mutex_unlock(&fullstop_mutex);
1130 unregister_reboot_notifier(&rcutorture_nb); 1346 unregister_reboot_notifier(&rcutorture_shutdown_nb);
1131 if (stutter_task) { 1347 if (stutter_task) {
1132 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task"); 1348 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task");
1133 kthread_stop(stutter_task); 1349 kthread_stop(stutter_task);
@@ -1184,6 +1400,12 @@ rcu_torture_cleanup(void)
1184 kthread_stop(fqs_task); 1400 kthread_stop(fqs_task);
1185 } 1401 }
1186 fqs_task = NULL; 1402 fqs_task = NULL;
1403 if ((test_boost == 1 && cur_ops->can_boost) ||
1404 test_boost == 2) {
1405 unregister_cpu_notifier(&rcutorture_cpu_nb);
1406 for_each_possible_cpu(i)
1407 rcutorture_booster_cleanup(i);
1408 }
1187 1409
1188 /* Wait for all RCU callbacks to fire. */ 1410 /* Wait for all RCU callbacks to fire. */
1189 1411
@@ -1195,9 +1417,9 @@ rcu_torture_cleanup(void)
1195 if (cur_ops->cleanup) 1417 if (cur_ops->cleanup)
1196 cur_ops->cleanup(); 1418 cur_ops->cleanup();
1197 if (atomic_read(&n_rcu_torture_error)) 1419 if (atomic_read(&n_rcu_torture_error))
1198 rcu_torture_print_module_parms("End of test: FAILURE"); 1420 rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE");
1199 else 1421 else
1200 rcu_torture_print_module_parms("End of test: SUCCESS"); 1422 rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS");
1201} 1423}
1202 1424
1203static int __init 1425static int __init
@@ -1242,7 +1464,7 @@ rcu_torture_init(void)
1242 nrealreaders = nreaders; 1464 nrealreaders = nreaders;
1243 else 1465 else
1244 nrealreaders = 2 * num_online_cpus(); 1466 nrealreaders = 2 * num_online_cpus();
1245 rcu_torture_print_module_parms("Start of test"); 1467 rcu_torture_print_module_parms(cur_ops, "Start of test");
1246 fullstop = FULLSTOP_DONTSTOP; 1468 fullstop = FULLSTOP_DONTSTOP;
1247 1469
1248 /* Set up the freelist. */ 1470 /* Set up the freelist. */
@@ -1263,6 +1485,12 @@ rcu_torture_init(void)
1263 atomic_set(&n_rcu_torture_free, 0); 1485 atomic_set(&n_rcu_torture_free, 0);
1264 atomic_set(&n_rcu_torture_mberror, 0); 1486 atomic_set(&n_rcu_torture_mberror, 0);
1265 atomic_set(&n_rcu_torture_error, 0); 1487 atomic_set(&n_rcu_torture_error, 0);
1488 n_rcu_torture_boost_ktrerror = 0;
1489 n_rcu_torture_boost_rterror = 0;
1490 n_rcu_torture_boost_allocerror = 0;
1491 n_rcu_torture_boost_afferror = 0;
1492 n_rcu_torture_boost_failure = 0;
1493 n_rcu_torture_boosts = 0;
1266 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) 1494 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
1267 atomic_set(&rcu_torture_wcount[i], 0); 1495 atomic_set(&rcu_torture_wcount[i], 0);
1268 for_each_possible_cpu(cpu) { 1496 for_each_possible_cpu(cpu) {
@@ -1376,7 +1604,27 @@ rcu_torture_init(void)
1376 goto unwind; 1604 goto unwind;
1377 } 1605 }
1378 } 1606 }
1379 register_reboot_notifier(&rcutorture_nb); 1607 if (test_boost_interval < 1)
1608 test_boost_interval = 1;
1609 if (test_boost_duration < 2)
1610 test_boost_duration = 2;
1611 if ((test_boost == 1 && cur_ops->can_boost) ||
1612 test_boost == 2) {
1613 int retval;
1614
1615 boost_starttime = jiffies + test_boost_interval * HZ;
1616 register_cpu_notifier(&rcutorture_cpu_nb);
1617 for_each_possible_cpu(i) {
1618 if (cpu_is_offline(i))
1619 continue; /* Heuristic: CPU can go offline. */
1620 retval = rcutorture_booster_init(i);
1621 if (retval < 0) {
1622 firsterr = retval;
1623 goto unwind;
1624 }
1625 }
1626 }
1627 register_reboot_notifier(&rcutorture_shutdown_nb);
1380 mutex_unlock(&fullstop_mutex); 1628 mutex_unlock(&fullstop_mutex);
1381 return 0; 1629 return 0;
1382 1630
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index ccdc04c47981..d0ddfea6579d 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -67,9 +67,6 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
67 .gpnum = -300, \ 67 .gpnum = -300, \
68 .completed = -300, \ 68 .completed = -300, \
69 .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \ 69 .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \
70 .orphan_cbs_list = NULL, \
71 .orphan_cbs_tail = &structname.orphan_cbs_list, \
72 .orphan_qlen = 0, \
73 .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \ 70 .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \
74 .n_force_qs = 0, \ 71 .n_force_qs = 0, \
75 .n_force_qs_ngp = 0, \ 72 .n_force_qs_ngp = 0, \
@@ -620,9 +617,17 @@ static void __init check_cpu_stall_init(void)
620static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) 617static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
621{ 618{
622 if (rdp->gpnum != rnp->gpnum) { 619 if (rdp->gpnum != rnp->gpnum) {
623 rdp->qs_pending = 1; 620 /*
624 rdp->passed_quiesc = 0; 621 * If the current grace period is waiting for this CPU,
622 * set up to detect a quiescent state, otherwise don't
623 * go looking for one.
624 */
625 rdp->gpnum = rnp->gpnum; 625 rdp->gpnum = rnp->gpnum;
626 if (rnp->qsmask & rdp->grpmask) {
627 rdp->qs_pending = 1;
628 rdp->passed_quiesc = 0;
629 } else
630 rdp->qs_pending = 0;
626 } 631 }
627} 632}
628 633
@@ -681,6 +686,24 @@ __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat
681 686
682 /* Remember that we saw this grace-period completion. */ 687 /* Remember that we saw this grace-period completion. */
683 rdp->completed = rnp->completed; 688 rdp->completed = rnp->completed;
689
690 /*
691 * If we were in an extended quiescent state, we may have
692 * missed some grace periods that others CPUs handled on
693 * our behalf. Catch up with this state to avoid noting
694 * spurious new grace periods. If another grace period
695 * has started, then rnp->gpnum will have advanced, so
696 * we will detect this later on.
697 */
698 if (ULONG_CMP_LT(rdp->gpnum, rdp->completed))
699 rdp->gpnum = rdp->completed;
700
701 /*
702 * If RCU does not need a quiescent state from this CPU,
703 * then make sure that this CPU doesn't go looking for one.
704 */
705 if ((rnp->qsmask & rdp->grpmask) == 0)
706 rdp->qs_pending = 0;
684 } 707 }
685} 708}
686 709
@@ -984,53 +1007,31 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
984#ifdef CONFIG_HOTPLUG_CPU 1007#ifdef CONFIG_HOTPLUG_CPU
985 1008
986/* 1009/*
987 * Move a dying CPU's RCU callbacks to the ->orphan_cbs_list for the 1010 * Move a dying CPU's RCU callbacks to online CPU's callback list.
988 * specified flavor of RCU. The callbacks will be adopted by the next 1011 * Synchronization is not required because this function executes
989 * _rcu_barrier() invocation or by the CPU_DEAD notifier, whichever 1012 * in stop_machine() context.
990 * comes first. Because this is invoked from the CPU_DYING notifier,
991 * irqs are already disabled.
992 */ 1013 */
993static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) 1014static void rcu_send_cbs_to_online(struct rcu_state *rsp)
994{ 1015{
995 int i; 1016 int i;
1017 /* current DYING CPU is cleared in the cpu_online_mask */
1018 int receive_cpu = cpumask_any(cpu_online_mask);
996 struct rcu_data *rdp = this_cpu_ptr(rsp->rda); 1019 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
1020 struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu);
997 1021
998 if (rdp->nxtlist == NULL) 1022 if (rdp->nxtlist == NULL)
999 return; /* irqs disabled, so comparison is stable. */ 1023 return; /* irqs disabled, so comparison is stable. */
1000 raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ 1024
1001 *rsp->orphan_cbs_tail = rdp->nxtlist; 1025 *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist;
1002 rsp->orphan_cbs_tail = rdp->nxttail[RCU_NEXT_TAIL]; 1026 receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
1027 receive_rdp->qlen += rdp->qlen;
1028 receive_rdp->n_cbs_adopted += rdp->qlen;
1029 rdp->n_cbs_orphaned += rdp->qlen;
1030
1003 rdp->nxtlist = NULL; 1031 rdp->nxtlist = NULL;
1004 for (i = 0; i < RCU_NEXT_SIZE; i++) 1032 for (i = 0; i < RCU_NEXT_SIZE; i++)
1005 rdp->nxttail[i] = &rdp->nxtlist; 1033 rdp->nxttail[i] = &rdp->nxtlist;
1006 rsp->orphan_qlen += rdp->qlen;
1007 rdp->n_cbs_orphaned += rdp->qlen;
1008 rdp->qlen = 0; 1034 rdp->qlen = 0;
1009 raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
1010}
1011
1012/*
1013 * Adopt previously orphaned RCU callbacks.
1014 */
1015static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
1016{
1017 unsigned long flags;
1018 struct rcu_data *rdp;
1019
1020 raw_spin_lock_irqsave(&rsp->onofflock, flags);
1021 rdp = this_cpu_ptr(rsp->rda);
1022 if (rsp->orphan_cbs_list == NULL) {
1023 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
1024 return;
1025 }
1026 *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list;
1027 rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_tail;
1028 rdp->qlen += rsp->orphan_qlen;
1029 rdp->n_cbs_adopted += rsp->orphan_qlen;
1030 rsp->orphan_cbs_list = NULL;
1031 rsp->orphan_cbs_tail = &rsp->orphan_cbs_list;
1032 rsp->orphan_qlen = 0;
1033 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
1034} 1035}
1035 1036
1036/* 1037/*
@@ -1081,8 +1082,6 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
1081 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1082 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1082 if (need_report & RCU_OFL_TASKS_EXP_GP) 1083 if (need_report & RCU_OFL_TASKS_EXP_GP)
1083 rcu_report_exp_rnp(rsp, rnp); 1084 rcu_report_exp_rnp(rsp, rnp);
1084
1085 rcu_adopt_orphan_cbs(rsp);
1086} 1085}
1087 1086
1088/* 1087/*
@@ -1100,11 +1099,7 @@ static void rcu_offline_cpu(int cpu)
1100 1099
1101#else /* #ifdef CONFIG_HOTPLUG_CPU */ 1100#else /* #ifdef CONFIG_HOTPLUG_CPU */
1102 1101
1103static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) 1102static void rcu_send_cbs_to_online(struct rcu_state *rsp)
1104{
1105}
1106
1107static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
1108{ 1103{
1109} 1104}
1110 1105
@@ -1440,22 +1435,11 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1440 */ 1435 */
1441 local_irq_save(flags); 1436 local_irq_save(flags);
1442 rdp = this_cpu_ptr(rsp->rda); 1437 rdp = this_cpu_ptr(rsp->rda);
1443 rcu_process_gp_end(rsp, rdp);
1444 check_for_new_grace_period(rsp, rdp);
1445 1438
1446 /* Add the callback to our list. */ 1439 /* Add the callback to our list. */
1447 *rdp->nxttail[RCU_NEXT_TAIL] = head; 1440 *rdp->nxttail[RCU_NEXT_TAIL] = head;
1448 rdp->nxttail[RCU_NEXT_TAIL] = &head->next; 1441 rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
1449 1442
1450 /* Start a new grace period if one not already started. */
1451 if (!rcu_gp_in_progress(rsp)) {
1452 unsigned long nestflag;
1453 struct rcu_node *rnp_root = rcu_get_root(rsp);
1454
1455 raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
1456 rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */
1457 }
1458
1459 /* 1443 /*
1460 * Force the grace period if too many callbacks or too long waiting. 1444 * Force the grace period if too many callbacks or too long waiting.
1461 * Enforce hysteresis, and don't invoke force_quiescent_state() 1445 * Enforce hysteresis, and don't invoke force_quiescent_state()
@@ -1464,12 +1448,27 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1464 * is the only one waiting for a grace period to complete. 1448 * is the only one waiting for a grace period to complete.
1465 */ 1449 */
1466 if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { 1450 if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
1467 rdp->blimit = LONG_MAX; 1451
1468 if (rsp->n_force_qs == rdp->n_force_qs_snap && 1452 /* Are we ignoring a completed grace period? */
1469 *rdp->nxttail[RCU_DONE_TAIL] != head) 1453 rcu_process_gp_end(rsp, rdp);
1470 force_quiescent_state(rsp, 0); 1454 check_for_new_grace_period(rsp, rdp);
1471 rdp->n_force_qs_snap = rsp->n_force_qs; 1455
1472 rdp->qlen_last_fqs_check = rdp->qlen; 1456 /* Start a new grace period if one not already started. */
1457 if (!rcu_gp_in_progress(rsp)) {
1458 unsigned long nestflag;
1459 struct rcu_node *rnp_root = rcu_get_root(rsp);
1460
1461 raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
1462 rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */
1463 } else {
1464 /* Give the grace period a kick. */
1465 rdp->blimit = LONG_MAX;
1466 if (rsp->n_force_qs == rdp->n_force_qs_snap &&
1467 *rdp->nxttail[RCU_DONE_TAIL] != head)
1468 force_quiescent_state(rsp, 0);
1469 rdp->n_force_qs_snap = rsp->n_force_qs;
1470 rdp->qlen_last_fqs_check = rdp->qlen;
1471 }
1473 } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) 1472 } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
1474 force_quiescent_state(rsp, 1); 1473 force_quiescent_state(rsp, 1);
1475 local_irq_restore(flags); 1474 local_irq_restore(flags);
@@ -1699,13 +1698,12 @@ static void _rcu_barrier(struct rcu_state *rsp,
1699 * decrement rcu_barrier_cpu_count -- otherwise the first CPU 1698 * decrement rcu_barrier_cpu_count -- otherwise the first CPU
1700 * might complete its grace period before all of the other CPUs 1699 * might complete its grace period before all of the other CPUs
1701 * did their increment, causing this function to return too 1700 * did their increment, causing this function to return too
1702 * early. 1701 * early. Note that on_each_cpu() disables irqs, which prevents
1702 * any CPUs from coming online or going offline until each online
1703 * CPU has queued its RCU-barrier callback.
1703 */ 1704 */
1704 atomic_set(&rcu_barrier_cpu_count, 1); 1705 atomic_set(&rcu_barrier_cpu_count, 1);
1705 preempt_disable(); /* stop CPU_DYING from filling orphan_cbs_list */
1706 rcu_adopt_orphan_cbs(rsp);
1707 on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1); 1706 on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1);
1708 preempt_enable(); /* CPU_DYING can again fill orphan_cbs_list */
1709 if (atomic_dec_and_test(&rcu_barrier_cpu_count)) 1707 if (atomic_dec_and_test(&rcu_barrier_cpu_count))
1710 complete(&rcu_barrier_completion); 1708 complete(&rcu_barrier_completion);
1711 wait_for_completion(&rcu_barrier_completion); 1709 wait_for_completion(&rcu_barrier_completion);
@@ -1831,18 +1829,13 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
1831 case CPU_DYING: 1829 case CPU_DYING:
1832 case CPU_DYING_FROZEN: 1830 case CPU_DYING_FROZEN:
1833 /* 1831 /*
1834 * preempt_disable() in _rcu_barrier() prevents stop_machine(), 1832 * The whole machine is "stopped" except this CPU, so we can
1835 * so when "on_each_cpu(rcu_barrier_func, (void *)type, 1);" 1833 * touch any data without introducing corruption. We send the
1836 * returns, all online cpus have queued rcu_barrier_func(). 1834 * dying CPU's callbacks to an arbitrarily chosen online CPU.
1837 * The dying CPU clears its cpu_online_mask bit and
1838 * moves all of its RCU callbacks to ->orphan_cbs_list
1839 * in the context of stop_machine(), so subsequent calls
1840 * to _rcu_barrier() will adopt these callbacks and only
1841 * then queue rcu_barrier_func() on all remaining CPUs.
1842 */ 1835 */
1843 rcu_send_cbs_to_orphanage(&rcu_bh_state); 1836 rcu_send_cbs_to_online(&rcu_bh_state);
1844 rcu_send_cbs_to_orphanage(&rcu_sched_state); 1837 rcu_send_cbs_to_online(&rcu_sched_state);
1845 rcu_preempt_send_cbs_to_orphanage(); 1838 rcu_preempt_send_cbs_to_online();
1846 break; 1839 break;
1847 case CPU_DEAD: 1840 case CPU_DEAD:
1848 case CPU_DEAD_FROZEN: 1841 case CPU_DEAD_FROZEN:
@@ -1880,8 +1873,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
1880{ 1873{
1881 int i; 1874 int i;
1882 1875
1883 for (i = NUM_RCU_LVLS - 1; i >= 0; i--) 1876 for (i = NUM_RCU_LVLS - 1; i > 0; i--)
1884 rsp->levelspread[i] = CONFIG_RCU_FANOUT; 1877 rsp->levelspread[i] = CONFIG_RCU_FANOUT;
1878 rsp->levelspread[0] = RCU_FANOUT_LEAF;
1885} 1879}
1886#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ 1880#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
1887static void __init rcu_init_levelspread(struct rcu_state *rsp) 1881static void __init rcu_init_levelspread(struct rcu_state *rsp)
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 91d4170c5c13..e8f057e44e3e 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -31,46 +31,51 @@
31/* 31/*
32 * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT. 32 * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT.
33 * In theory, it should be possible to add more levels straightforwardly. 33 * In theory, it should be possible to add more levels straightforwardly.
34 * In practice, this has not been tested, so there is probably some 34 * In practice, this did work well going from three levels to four.
35 * bug somewhere. 35 * Of course, your mileage may vary.
36 */ 36 */
37#define MAX_RCU_LVLS 4 37#define MAX_RCU_LVLS 4
38#define RCU_FANOUT (CONFIG_RCU_FANOUT) 38#if CONFIG_RCU_FANOUT > 16
39#define RCU_FANOUT_SQ (RCU_FANOUT * RCU_FANOUT) 39#define RCU_FANOUT_LEAF 16
40#define RCU_FANOUT_CUBE (RCU_FANOUT_SQ * RCU_FANOUT) 40#else /* #if CONFIG_RCU_FANOUT > 16 */
41#define RCU_FANOUT_FOURTH (RCU_FANOUT_CUBE * RCU_FANOUT) 41#define RCU_FANOUT_LEAF (CONFIG_RCU_FANOUT)
42 42#endif /* #else #if CONFIG_RCU_FANOUT > 16 */
43#if NR_CPUS <= RCU_FANOUT 43#define RCU_FANOUT_1 (RCU_FANOUT_LEAF)
44#define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT)
45#define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT)
46#define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)
47
48#if NR_CPUS <= RCU_FANOUT_1
44# define NUM_RCU_LVLS 1 49# define NUM_RCU_LVLS 1
45# define NUM_RCU_LVL_0 1 50# define NUM_RCU_LVL_0 1
46# define NUM_RCU_LVL_1 (NR_CPUS) 51# define NUM_RCU_LVL_1 (NR_CPUS)
47# define NUM_RCU_LVL_2 0 52# define NUM_RCU_LVL_2 0
48# define NUM_RCU_LVL_3 0 53# define NUM_RCU_LVL_3 0
49# define NUM_RCU_LVL_4 0 54# define NUM_RCU_LVL_4 0
50#elif NR_CPUS <= RCU_FANOUT_SQ 55#elif NR_CPUS <= RCU_FANOUT_2
51# define NUM_RCU_LVLS 2 56# define NUM_RCU_LVLS 2
52# define NUM_RCU_LVL_0 1 57# define NUM_RCU_LVL_0 1
53# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT) 58# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
54# define NUM_RCU_LVL_2 (NR_CPUS) 59# define NUM_RCU_LVL_2 (NR_CPUS)
55# define NUM_RCU_LVL_3 0 60# define NUM_RCU_LVL_3 0
56# define NUM_RCU_LVL_4 0 61# define NUM_RCU_LVL_4 0
57#elif NR_CPUS <= RCU_FANOUT_CUBE 62#elif NR_CPUS <= RCU_FANOUT_3
58# define NUM_RCU_LVLS 3 63# define NUM_RCU_LVLS 3
59# define NUM_RCU_LVL_0 1 64# define NUM_RCU_LVL_0 1
60# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ) 65# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
61# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT) 66# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
62# define NUM_RCU_LVL_3 NR_CPUS 67# define NUM_RCU_LVL_3 (NR_CPUS)
63# define NUM_RCU_LVL_4 0 68# define NUM_RCU_LVL_4 0
64#elif NR_CPUS <= RCU_FANOUT_FOURTH 69#elif NR_CPUS <= RCU_FANOUT_4
65# define NUM_RCU_LVLS 4 70# define NUM_RCU_LVLS 4
66# define NUM_RCU_LVL_0 1 71# define NUM_RCU_LVL_0 1
67# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_CUBE) 72# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
68# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ) 73# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
69# define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT) 74# define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
70# define NUM_RCU_LVL_4 NR_CPUS 75# define NUM_RCU_LVL_4 (NR_CPUS)
71#else 76#else
72# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" 77# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
73#endif /* #if (NR_CPUS) <= RCU_FANOUT */ 78#endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */
74 79
75#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4) 80#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4)
76#define NUM_RCU_NODES (RCU_SUM - NR_CPUS) 81#define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
@@ -203,8 +208,8 @@ struct rcu_data {
203 long qlen_last_fqs_check; 208 long qlen_last_fqs_check;
204 /* qlen at last check for QS forcing */ 209 /* qlen at last check for QS forcing */
205 unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */ 210 unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */
206 unsigned long n_cbs_orphaned; /* RCU cbs sent to orphanage. */ 211 unsigned long n_cbs_orphaned; /* RCU cbs orphaned by dying CPU */
207 unsigned long n_cbs_adopted; /* RCU cbs adopted from orphanage. */ 212 unsigned long n_cbs_adopted; /* RCU cbs adopted from dying CPU */
208 unsigned long n_force_qs_snap; 213 unsigned long n_force_qs_snap;
209 /* did other CPU force QS recently? */ 214 /* did other CPU force QS recently? */
210 long blimit; /* Upper limit on a processed batch */ 215 long blimit; /* Upper limit on a processed batch */
@@ -309,15 +314,7 @@ struct rcu_state {
309 /* End of fields guarded by root rcu_node's lock. */ 314 /* End of fields guarded by root rcu_node's lock. */
310 315
311 raw_spinlock_t onofflock; /* exclude on/offline and */ 316 raw_spinlock_t onofflock; /* exclude on/offline and */
312 /* starting new GP. Also */ 317 /* starting new GP. */
313 /* protects the following */
314 /* orphan_cbs fields. */
315 struct rcu_head *orphan_cbs_list; /* list of rcu_head structs */
316 /* orphaned by all CPUs in */
317 /* a given leaf rcu_node */
318 /* going offline. */
319 struct rcu_head **orphan_cbs_tail; /* And tail pointer. */
320 long orphan_qlen; /* Number of orphaned cbs. */
321 raw_spinlock_t fqslock; /* Only one task forcing */ 318 raw_spinlock_t fqslock; /* Only one task forcing */
322 /* quiescent states. */ 319 /* quiescent states. */
323 unsigned long jiffies_force_qs; /* Time at which to invoke */ 320 unsigned long jiffies_force_qs; /* Time at which to invoke */
@@ -390,7 +387,7 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp);
390static int rcu_preempt_pending(int cpu); 387static int rcu_preempt_pending(int cpu);
391static int rcu_preempt_needs_cpu(int cpu); 388static int rcu_preempt_needs_cpu(int cpu);
392static void __cpuinit rcu_preempt_init_percpu_data(int cpu); 389static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
393static void rcu_preempt_send_cbs_to_orphanage(void); 390static void rcu_preempt_send_cbs_to_online(void);
394static void __init __rcu_init_preempt(void); 391static void __init __rcu_init_preempt(void);
395static void rcu_needs_cpu_flush(void); 392static void rcu_needs_cpu_flush(void);
396 393
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 71a4147473f9..a3638710dc67 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -25,6 +25,7 @@
25 */ 25 */
26 26
27#include <linux/delay.h> 27#include <linux/delay.h>
28#include <linux/stop_machine.h>
28 29
29/* 30/*
30 * Check the RCU kernel configuration parameters and print informative 31 * Check the RCU kernel configuration parameters and print informative
@@ -773,11 +774,11 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
773} 774}
774 775
775/* 776/*
776 * Move preemptable RCU's callbacks to ->orphan_cbs_list. 777 * Move preemptable RCU's callbacks from dying CPU to other online CPU.
777 */ 778 */
778static void rcu_preempt_send_cbs_to_orphanage(void) 779static void rcu_preempt_send_cbs_to_online(void)
779{ 780{
780 rcu_send_cbs_to_orphanage(&rcu_preempt_state); 781 rcu_send_cbs_to_online(&rcu_preempt_state);
781} 782}
782 783
783/* 784/*
@@ -1001,7 +1002,7 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
1001/* 1002/*
1002 * Because there is no preemptable RCU, there are no callbacks to move. 1003 * Because there is no preemptable RCU, there are no callbacks to move.
1003 */ 1004 */
1004static void rcu_preempt_send_cbs_to_orphanage(void) 1005static void rcu_preempt_send_cbs_to_online(void)
1005{ 1006{
1006} 1007}
1007 1008
@@ -1014,6 +1015,132 @@ static void __init __rcu_init_preempt(void)
1014 1015
1015#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ 1016#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
1016 1017
1018#ifndef CONFIG_SMP
1019
1020void synchronize_sched_expedited(void)
1021{
1022 cond_resched();
1023}
1024EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
1025
1026#else /* #ifndef CONFIG_SMP */
1027
1028static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0);
1029static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0);
1030
1031static int synchronize_sched_expedited_cpu_stop(void *data)
1032{
1033 /*
1034 * There must be a full memory barrier on each affected CPU
1035 * between the time that try_stop_cpus() is called and the
1036 * time that it returns.
1037 *
1038 * In the current initial implementation of cpu_stop, the
1039 * above condition is already met when the control reaches
1040 * this point and the following smp_mb() is not strictly
1041 * necessary. Do smp_mb() anyway for documentation and
1042 * robustness against future implementation changes.
1043 */
1044 smp_mb(); /* See above comment block. */
1045 return 0;
1046}
1047
1048/*
1049 * Wait for an rcu-sched grace period to elapse, but use "big hammer"
1050 * approach to force grace period to end quickly. This consumes
1051 * significant time on all CPUs, and is thus not recommended for
1052 * any sort of common-case code.
1053 *
1054 * Note that it is illegal to call this function while holding any
1055 * lock that is acquired by a CPU-hotplug notifier. Failing to
1056 * observe this restriction will result in deadlock.
1057 *
1058 * This implementation can be thought of as an application of ticket
1059 * locking to RCU, with sync_sched_expedited_started and
1060 * sync_sched_expedited_done taking on the roles of the halves
1061 * of the ticket-lock word. Each task atomically increments
1062 * sync_sched_expedited_started upon entry, snapshotting the old value,
1063 * then attempts to stop all the CPUs. If this succeeds, then each
1064 * CPU will have executed a context switch, resulting in an RCU-sched
1065 * grace period. We are then done, so we use atomic_cmpxchg() to
1066 * update sync_sched_expedited_done to match our snapshot -- but
1067 * only if someone else has not already advanced past our snapshot.
1068 *
1069 * On the other hand, if try_stop_cpus() fails, we check the value
1070 * of sync_sched_expedited_done. If it has advanced past our
1071 * initial snapshot, then someone else must have forced a grace period
1072 * some time after we took our snapshot. In this case, our work is
1073 * done for us, and we can simply return. Otherwise, we try again,
1074 * but keep our initial snapshot for purposes of checking for someone
1075 * doing our work for us.
1076 *
1077 * If we fail too many times in a row, we fall back to synchronize_sched().
1078 */
1079void synchronize_sched_expedited(void)
1080{
1081 int firstsnap, s, snap, trycount = 0;
1082
1083 /* Note that atomic_inc_return() implies full memory barrier. */
1084 firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started);
1085 get_online_cpus();
1086
1087 /*
1088 * Each pass through the following loop attempts to force a
1089 * context switch on each CPU.
1090 */
1091 while (try_stop_cpus(cpu_online_mask,
1092 synchronize_sched_expedited_cpu_stop,
1093 NULL) == -EAGAIN) {
1094 put_online_cpus();
1095
1096 /* No joy, try again later. Or just synchronize_sched(). */
1097 if (trycount++ < 10)
1098 udelay(trycount * num_online_cpus());
1099 else {
1100 synchronize_sched();
1101 return;
1102 }
1103
1104 /* Check to see if someone else did our work for us. */
1105 s = atomic_read(&sync_sched_expedited_done);
1106 if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) {
1107 smp_mb(); /* ensure test happens before caller kfree */
1108 return;
1109 }
1110
1111 /*
1112 * Refetching sync_sched_expedited_started allows later
1113 * callers to piggyback on our grace period. We subtract
1114 * 1 to get the same token that the last incrementer got.
1115 * We retry after they started, so our grace period works
1116 * for them, and they started after our first try, so their
1117 * grace period works for us.
1118 */
1119 get_online_cpus();
1120 snap = atomic_read(&sync_sched_expedited_started) - 1;
1121 smp_mb(); /* ensure read is before try_stop_cpus(). */
1122 }
1123
1124 /*
1125 * Everyone up to our most recent fetch is covered by our grace
1126 * period. Update the counter, but only if our work is still
1127 * relevant -- which it won't be if someone who started later
1128 * than we did beat us to the punch.
1129 */
1130 do {
1131 s = atomic_read(&sync_sched_expedited_done);
1132 if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) {
1133 smp_mb(); /* ensure test happens before caller kfree */
1134 break;
1135 }
1136 } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s);
1137
1138 put_online_cpus();
1139}
1140EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
1141
1142#endif /* #else #ifndef CONFIG_SMP */
1143
1017#if !defined(CONFIG_RCU_FAST_NO_HZ) 1144#if !defined(CONFIG_RCU_FAST_NO_HZ)
1018 1145
1019/* 1146/*
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index d15430b9d122..c8e97853b970 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -166,13 +166,13 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
166 166
167 gpnum = rsp->gpnum; 167 gpnum = rsp->gpnum;
168 seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x " 168 seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x "
169 "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld\n", 169 "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n",
170 rsp->completed, gpnum, rsp->signaled, 170 rsp->completed, gpnum, rsp->signaled,
171 (long)(rsp->jiffies_force_qs - jiffies), 171 (long)(rsp->jiffies_force_qs - jiffies),
172 (int)(jiffies & 0xffff), 172 (int)(jiffies & 0xffff),
173 rsp->n_force_qs, rsp->n_force_qs_ngp, 173 rsp->n_force_qs, rsp->n_force_qs_ngp,
174 rsp->n_force_qs - rsp->n_force_qs_ngp, 174 rsp->n_force_qs - rsp->n_force_qs_ngp,
175 rsp->n_force_qs_lh, rsp->orphan_qlen); 175 rsp->n_force_qs_lh);
176 for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) { 176 for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) {
177 if (rnp->level != level) { 177 if (rnp->level != level) {
178 seq_puts(m, "\n"); 178 seq_puts(m, "\n");
@@ -300,7 +300,7 @@ static const struct file_operations rcu_pending_fops = {
300 300
301static struct dentry *rcudir; 301static struct dentry *rcudir;
302 302
303static int __init rcuclassic_trace_init(void) 303static int __init rcutree_trace_init(void)
304{ 304{
305 struct dentry *retval; 305 struct dentry *retval;
306 306
@@ -337,14 +337,14 @@ free_out:
337 return 1; 337 return 1;
338} 338}
339 339
340static void __exit rcuclassic_trace_cleanup(void) 340static void __exit rcutree_trace_cleanup(void)
341{ 341{
342 debugfs_remove_recursive(rcudir); 342 debugfs_remove_recursive(rcudir);
343} 343}
344 344
345 345
346module_init(rcuclassic_trace_init); 346module_init(rcutree_trace_init);
347module_exit(rcuclassic_trace_cleanup); 347module_exit(rcutree_trace_cleanup);
348 348
349MODULE_AUTHOR("Paul E. McKenney"); 349MODULE_AUTHOR("Paul E. McKenney");
350MODULE_DESCRIPTION("Read-Copy Update tracing for hierarchical implementation"); 350MODULE_DESCRIPTION("Read-Copy Update tracing for hierarchical implementation");
diff --git a/kernel/sched.c b/kernel/sched.c
index 297d1a0eedb0..04949089e760 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -75,9 +75,11 @@
75 75
76#include <asm/tlb.h> 76#include <asm/tlb.h>
77#include <asm/irq_regs.h> 77#include <asm/irq_regs.h>
78#include <asm/mutex.h>
78 79
79#include "sched_cpupri.h" 80#include "sched_cpupri.h"
80#include "workqueue_sched.h" 81#include "workqueue_sched.h"
82#include "sched_autogroup.h"
81 83
82#define CREATE_TRACE_POINTS 84#define CREATE_TRACE_POINTS
83#include <trace/events/sched.h> 85#include <trace/events/sched.h>
@@ -253,6 +255,8 @@ struct task_group {
253 /* runqueue "owned" by this group on each cpu */ 255 /* runqueue "owned" by this group on each cpu */
254 struct cfs_rq **cfs_rq; 256 struct cfs_rq **cfs_rq;
255 unsigned long shares; 257 unsigned long shares;
258
259 atomic_t load_weight;
256#endif 260#endif
257 261
258#ifdef CONFIG_RT_GROUP_SCHED 262#ifdef CONFIG_RT_GROUP_SCHED
@@ -268,24 +272,19 @@ struct task_group {
268 struct task_group *parent; 272 struct task_group *parent;
269 struct list_head siblings; 273 struct list_head siblings;
270 struct list_head children; 274 struct list_head children;
275
276#ifdef CONFIG_SCHED_AUTOGROUP
277 struct autogroup *autogroup;
278#endif
271}; 279};
272 280
273#define root_task_group init_task_group 281#define root_task_group init_task_group
274 282
275/* task_group_lock serializes add/remove of task groups and also changes to 283/* task_group_lock serializes the addition/removal of task groups */
276 * a task group's cpu shares.
277 */
278static DEFINE_SPINLOCK(task_group_lock); 284static DEFINE_SPINLOCK(task_group_lock);
279 285
280#ifdef CONFIG_FAIR_GROUP_SCHED 286#ifdef CONFIG_FAIR_GROUP_SCHED
281 287
282#ifdef CONFIG_SMP
283static int root_task_group_empty(void)
284{
285 return list_empty(&root_task_group.children);
286}
287#endif
288
289# define INIT_TASK_GROUP_LOAD NICE_0_LOAD 288# define INIT_TASK_GROUP_LOAD NICE_0_LOAD
290 289
291/* 290/*
@@ -342,6 +341,7 @@ struct cfs_rq {
342 * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This 341 * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This
343 * list is used during load balance. 342 * list is used during load balance.
344 */ 343 */
344 int on_list;
345 struct list_head leaf_cfs_rq_list; 345 struct list_head leaf_cfs_rq_list;
346 struct task_group *tg; /* group that "owns" this runqueue */ 346 struct task_group *tg; /* group that "owns" this runqueue */
347 347
@@ -360,14 +360,17 @@ struct cfs_rq {
360 unsigned long h_load; 360 unsigned long h_load;
361 361
362 /* 362 /*
363 * this cpu's part of tg->shares 363 * Maintaining per-cpu shares distribution for group scheduling
364 *
365 * load_stamp is the last time we updated the load average
366 * load_last is the last time we updated the load average and saw load
367 * load_unacc_exec_time is currently unaccounted execution time
364 */ 368 */
365 unsigned long shares; 369 u64 load_avg;
370 u64 load_period;
371 u64 load_stamp, load_last, load_unacc_exec_time;
366 372
367 /* 373 unsigned long load_contribution;
368 * load.weight at the time we set shares
369 */
370 unsigned long rq_weight;
371#endif 374#endif
372#endif 375#endif
373}; 376};
@@ -605,11 +608,14 @@ static inline int cpu_of(struct rq *rq)
605 */ 608 */
606static inline struct task_group *task_group(struct task_struct *p) 609static inline struct task_group *task_group(struct task_struct *p)
607{ 610{
611 struct task_group *tg;
608 struct cgroup_subsys_state *css; 612 struct cgroup_subsys_state *css;
609 613
610 css = task_subsys_state_check(p, cpu_cgroup_subsys_id, 614 css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
611 lockdep_is_held(&task_rq(p)->lock)); 615 lockdep_is_held(&task_rq(p)->lock));
612 return container_of(css, struct task_group, css); 616 tg = container_of(css, struct task_group, css);
617
618 return autogroup_task_group(p, tg);
613} 619}
614 620
615/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ 621/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
@@ -793,20 +799,6 @@ late_initcall(sched_init_debug);
793const_debug unsigned int sysctl_sched_nr_migrate = 32; 799const_debug unsigned int sysctl_sched_nr_migrate = 32;
794 800
795/* 801/*
796 * ratelimit for updating the group shares.
797 * default: 0.25ms
798 */
799unsigned int sysctl_sched_shares_ratelimit = 250000;
800unsigned int normalized_sysctl_sched_shares_ratelimit = 250000;
801
802/*
803 * Inject some fuzzyness into changing the per-cpu group shares
804 * this avoids remote rq-locks at the expense of fairness.
805 * default: 4
806 */
807unsigned int sysctl_sched_shares_thresh = 4;
808
809/*
810 * period over which we average the RT time consumption, measured 802 * period over which we average the RT time consumption, measured
811 * in ms. 803 * in ms.
812 * 804 *
@@ -1355,6 +1347,12 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
1355 lw->inv_weight = 0; 1347 lw->inv_weight = 0;
1356} 1348}
1357 1349
1350static inline void update_load_set(struct load_weight *lw, unsigned long w)
1351{
1352 lw->weight = w;
1353 lw->inv_weight = 0;
1354}
1355
1358/* 1356/*
1359 * To aid in avoiding the subversion of "niceness" due to uneven distribution 1357 * To aid in avoiding the subversion of "niceness" due to uneven distribution
1360 * of tasks with abnormal "nice" values across CPUs the contribution that 1358 * of tasks with abnormal "nice" values across CPUs the contribution that
@@ -1543,101 +1541,6 @@ static unsigned long cpu_avg_load_per_task(int cpu)
1543 1541
1544#ifdef CONFIG_FAIR_GROUP_SCHED 1542#ifdef CONFIG_FAIR_GROUP_SCHED
1545 1543
1546static __read_mostly unsigned long __percpu *update_shares_data;
1547
1548static void __set_se_shares(struct sched_entity *se, unsigned long shares);
1549
1550/*
1551 * Calculate and set the cpu's group shares.
1552 */
1553static void update_group_shares_cpu(struct task_group *tg, int cpu,
1554 unsigned long sd_shares,
1555 unsigned long sd_rq_weight,
1556 unsigned long *usd_rq_weight)
1557{
1558 unsigned long shares, rq_weight;
1559 int boost = 0;
1560
1561 rq_weight = usd_rq_weight[cpu];
1562 if (!rq_weight) {
1563 boost = 1;
1564 rq_weight = NICE_0_LOAD;
1565 }
1566
1567 /*
1568 * \Sum_j shares_j * rq_weight_i
1569 * shares_i = -----------------------------
1570 * \Sum_j rq_weight_j
1571 */
1572 shares = (sd_shares * rq_weight) / sd_rq_weight;
1573 shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
1574
1575 if (abs(shares - tg->se[cpu]->load.weight) >
1576 sysctl_sched_shares_thresh) {
1577 struct rq *rq = cpu_rq(cpu);
1578 unsigned long flags;
1579
1580 raw_spin_lock_irqsave(&rq->lock, flags);
1581 tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight;
1582 tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
1583 __set_se_shares(tg->se[cpu], shares);
1584 raw_spin_unlock_irqrestore(&rq->lock, flags);
1585 }
1586}
1587
1588/*
1589 * Re-compute the task group their per cpu shares over the given domain.
1590 * This needs to be done in a bottom-up fashion because the rq weight of a
1591 * parent group depends on the shares of its child groups.
1592 */
1593static int tg_shares_up(struct task_group *tg, void *data)
1594{
1595 unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0;
1596 unsigned long *usd_rq_weight;
1597 struct sched_domain *sd = data;
1598 unsigned long flags;
1599 int i;
1600
1601 if (!tg->se[0])
1602 return 0;
1603
1604 local_irq_save(flags);
1605 usd_rq_weight = per_cpu_ptr(update_shares_data, smp_processor_id());
1606
1607 for_each_cpu(i, sched_domain_span(sd)) {
1608 weight = tg->cfs_rq[i]->load.weight;
1609 usd_rq_weight[i] = weight;
1610
1611 rq_weight += weight;
1612 /*
1613 * If there are currently no tasks on the cpu pretend there
1614 * is one of average load so that when a new task gets to
1615 * run here it will not get delayed by group starvation.
1616 */
1617 if (!weight)
1618 weight = NICE_0_LOAD;
1619
1620 sum_weight += weight;
1621 shares += tg->cfs_rq[i]->shares;
1622 }
1623
1624 if (!rq_weight)
1625 rq_weight = sum_weight;
1626
1627 if ((!shares && rq_weight) || shares > tg->shares)
1628 shares = tg->shares;
1629
1630 if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
1631 shares = tg->shares;
1632
1633 for_each_cpu(i, sched_domain_span(sd))
1634 update_group_shares_cpu(tg, i, shares, rq_weight, usd_rq_weight);
1635
1636 local_irq_restore(flags);
1637
1638 return 0;
1639}
1640
1641/* 1544/*
1642 * Compute the cpu's hierarchical load factor for each task group. 1545 * Compute the cpu's hierarchical load factor for each task group.
1643 * This needs to be done in a top-down fashion because the load of a child 1546 * This needs to be done in a top-down fashion because the load of a child
@@ -1652,7 +1555,7 @@ static int tg_load_down(struct task_group *tg, void *data)
1652 load = cpu_rq(cpu)->load.weight; 1555 load = cpu_rq(cpu)->load.weight;
1653 } else { 1556 } else {
1654 load = tg->parent->cfs_rq[cpu]->h_load; 1557 load = tg->parent->cfs_rq[cpu]->h_load;
1655 load *= tg->cfs_rq[cpu]->shares; 1558 load *= tg->se[cpu]->load.weight;
1656 load /= tg->parent->cfs_rq[cpu]->load.weight + 1; 1559 load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
1657 } 1560 }
1658 1561
@@ -1661,34 +1564,11 @@ static int tg_load_down(struct task_group *tg, void *data)
1661 return 0; 1564 return 0;
1662} 1565}
1663 1566
1664static void update_shares(struct sched_domain *sd)
1665{
1666 s64 elapsed;
1667 u64 now;
1668
1669 if (root_task_group_empty())
1670 return;
1671
1672 now = local_clock();
1673 elapsed = now - sd->last_update;
1674
1675 if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
1676 sd->last_update = now;
1677 walk_tg_tree(tg_nop, tg_shares_up, sd);
1678 }
1679}
1680
1681static void update_h_load(long cpu) 1567static void update_h_load(long cpu)
1682{ 1568{
1683 walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); 1569 walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
1684} 1570}
1685 1571
1686#else
1687
1688static inline void update_shares(struct sched_domain *sd)
1689{
1690}
1691
1692#endif 1572#endif
1693 1573
1694#ifdef CONFIG_PREEMPT 1574#ifdef CONFIG_PREEMPT
@@ -1810,15 +1690,6 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
1810 1690
1811#endif 1691#endif
1812 1692
1813#ifdef CONFIG_FAIR_GROUP_SCHED
1814static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
1815{
1816#ifdef CONFIG_SMP
1817 cfs_rq->shares = shares;
1818#endif
1819}
1820#endif
1821
1822static void calc_load_account_idle(struct rq *this_rq); 1693static void calc_load_account_idle(struct rq *this_rq);
1823static void update_sysctl(void); 1694static void update_sysctl(void);
1824static int get_update_sysctl_factor(void); 1695static int get_update_sysctl_factor(void);
@@ -2063,6 +1934,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
2063#include "sched_idletask.c" 1934#include "sched_idletask.c"
2064#include "sched_fair.c" 1935#include "sched_fair.c"
2065#include "sched_rt.c" 1936#include "sched_rt.c"
1937#include "sched_autogroup.c"
2066#include "sched_stoptask.c" 1938#include "sched_stoptask.c"
2067#ifdef CONFIG_SCHED_DEBUG 1939#ifdef CONFIG_SCHED_DEBUG
2068# include "sched_debug.c" 1940# include "sched_debug.c"
@@ -2255,10 +2127,8 @@ static int migration_cpu_stop(void *data);
2255 * The task's runqueue lock must be held. 2127 * The task's runqueue lock must be held.
2256 * Returns true if you have to wait for migration thread. 2128 * Returns true if you have to wait for migration thread.
2257 */ 2129 */
2258static bool migrate_task(struct task_struct *p, int dest_cpu) 2130static bool migrate_task(struct task_struct *p, struct rq *rq)
2259{ 2131{
2260 struct rq *rq = task_rq(p);
2261
2262 /* 2132 /*
2263 * If the task is not on a runqueue (and not running), then 2133 * If the task is not on a runqueue (and not running), then
2264 * the next wake-up will properly place the task. 2134 * the next wake-up will properly place the task.
@@ -2438,18 +2308,15 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
2438 return dest_cpu; 2308 return dest_cpu;
2439 2309
2440 /* No more Mr. Nice Guy. */ 2310 /* No more Mr. Nice Guy. */
2441 if (unlikely(dest_cpu >= nr_cpu_ids)) { 2311 dest_cpu = cpuset_cpus_allowed_fallback(p);
2442 dest_cpu = cpuset_cpus_allowed_fallback(p); 2312 /*
2443 /* 2313 * Don't tell them about moving exiting tasks or
2444 * Don't tell them about moving exiting tasks or 2314 * kernel threads (both mm NULL), since they never
2445 * kernel threads (both mm NULL), since they never 2315 * leave kernel.
2446 * leave kernel. 2316 */
2447 */ 2317 if (p->mm && printk_ratelimit()) {
2448 if (p->mm && printk_ratelimit()) { 2318 printk(KERN_INFO "process %d (%s) no longer affine to cpu%d\n",
2449 printk(KERN_INFO "process %d (%s) no " 2319 task_pid_nr(p), p->comm, cpu);
2450 "longer affine to cpu%d\n",
2451 task_pid_nr(p), p->comm, cpu);
2452 }
2453 } 2320 }
2454 2321
2455 return dest_cpu; 2322 return dest_cpu;
@@ -2785,7 +2652,9 @@ void sched_fork(struct task_struct *p, int clone_flags)
2785 /* Want to start with kernel preemption disabled. */ 2652 /* Want to start with kernel preemption disabled. */
2786 task_thread_info(p)->preempt_count = 1; 2653 task_thread_info(p)->preempt_count = 1;
2787#endif 2654#endif
2655#ifdef CONFIG_SMP
2788 plist_node_init(&p->pushable_tasks, MAX_PRIO); 2656 plist_node_init(&p->pushable_tasks, MAX_PRIO);
2657#endif
2789 2658
2790 put_cpu(); 2659 put_cpu();
2791} 2660}
@@ -3549,7 +3418,7 @@ void sched_exec(void)
3549 * select_task_rq() can race against ->cpus_allowed 3418 * select_task_rq() can race against ->cpus_allowed
3550 */ 3419 */
3551 if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) && 3420 if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) &&
3552 likely(cpu_active(dest_cpu)) && migrate_task(p, dest_cpu)) { 3421 likely(cpu_active(dest_cpu)) && migrate_task(p, rq)) {
3553 struct migration_arg arg = { p, dest_cpu }; 3422 struct migration_arg arg = { p, dest_cpu };
3554 3423
3555 task_rq_unlock(rq, &flags); 3424 task_rq_unlock(rq, &flags);
@@ -4214,7 +4083,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
4214 if (task_thread_info(rq->curr) != owner || need_resched()) 4083 if (task_thread_info(rq->curr) != owner || need_resched())
4215 return 0; 4084 return 0;
4216 4085
4217 cpu_relax(); 4086 arch_mutex_cpu_relax();
4218 } 4087 }
4219 4088
4220 return 1; 4089 return 1;
@@ -4526,7 +4395,7 @@ EXPORT_SYMBOL(wait_for_completion_interruptible);
4526 * This waits for either a completion of a specific task to be signaled or for a 4395 * This waits for either a completion of a specific task to be signaled or for a
4527 * specified timeout to expire. It is interruptible. The timeout is in jiffies. 4396 * specified timeout to expire. It is interruptible. The timeout is in jiffies.
4528 */ 4397 */
4529unsigned long __sched 4398long __sched
4530wait_for_completion_interruptible_timeout(struct completion *x, 4399wait_for_completion_interruptible_timeout(struct completion *x,
4531 unsigned long timeout) 4400 unsigned long timeout)
4532{ 4401{
@@ -4559,7 +4428,7 @@ EXPORT_SYMBOL(wait_for_completion_killable);
4559 * signaled or for a specified timeout to expire. It can be 4428 * signaled or for a specified timeout to expire. It can be
4560 * interrupted by a kill signal. The timeout is in jiffies. 4429 * interrupted by a kill signal. The timeout is in jiffies.
4561 */ 4430 */
4562unsigned long __sched 4431long __sched
4563wait_for_completion_killable_timeout(struct completion *x, 4432wait_for_completion_killable_timeout(struct completion *x,
4564 unsigned long timeout) 4433 unsigned long timeout)
4565{ 4434{
@@ -4901,7 +4770,7 @@ static bool check_same_owner(struct task_struct *p)
4901} 4770}
4902 4771
4903static int __sched_setscheduler(struct task_struct *p, int policy, 4772static int __sched_setscheduler(struct task_struct *p, int policy,
4904 struct sched_param *param, bool user) 4773 const struct sched_param *param, bool user)
4905{ 4774{
4906 int retval, oldprio, oldpolicy = -1, on_rq, running; 4775 int retval, oldprio, oldpolicy = -1, on_rq, running;
4907 unsigned long flags; 4776 unsigned long flags;
@@ -5056,7 +4925,7 @@ recheck:
5056 * NOTE that the task may be already dead. 4925 * NOTE that the task may be already dead.
5057 */ 4926 */
5058int sched_setscheduler(struct task_struct *p, int policy, 4927int sched_setscheduler(struct task_struct *p, int policy,
5059 struct sched_param *param) 4928 const struct sched_param *param)
5060{ 4929{
5061 return __sched_setscheduler(p, policy, param, true); 4930 return __sched_setscheduler(p, policy, param, true);
5062} 4931}
@@ -5074,7 +4943,7 @@ EXPORT_SYMBOL_GPL(sched_setscheduler);
5074 * but our caller might not have that capability. 4943 * but our caller might not have that capability.
5075 */ 4944 */
5076int sched_setscheduler_nocheck(struct task_struct *p, int policy, 4945int sched_setscheduler_nocheck(struct task_struct *p, int policy,
5077 struct sched_param *param) 4946 const struct sched_param *param)
5078{ 4947{
5079 return __sched_setscheduler(p, policy, param, false); 4948 return __sched_setscheduler(p, policy, param, false);
5080} 4949}
@@ -5590,7 +5459,7 @@ void sched_show_task(struct task_struct *p)
5590 unsigned state; 5459 unsigned state;
5591 5460
5592 state = p->state ? __ffs(p->state) + 1 : 0; 5461 state = p->state ? __ffs(p->state) + 1 : 0;
5593 printk(KERN_INFO "%-13.13s %c", p->comm, 5462 printk(KERN_INFO "%-15.15s %c", p->comm,
5594 state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?'); 5463 state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
5595#if BITS_PER_LONG == 32 5464#if BITS_PER_LONG == 32
5596 if (state == TASK_RUNNING) 5465 if (state == TASK_RUNNING)
@@ -5754,7 +5623,6 @@ static void update_sysctl(void)
5754 SET_SYSCTL(sched_min_granularity); 5623 SET_SYSCTL(sched_min_granularity);
5755 SET_SYSCTL(sched_latency); 5624 SET_SYSCTL(sched_latency);
5756 SET_SYSCTL(sched_wakeup_granularity); 5625 SET_SYSCTL(sched_wakeup_granularity);
5757 SET_SYSCTL(sched_shares_ratelimit);
5758#undef SET_SYSCTL 5626#undef SET_SYSCTL
5759} 5627}
5760 5628
@@ -5830,7 +5698,7 @@ again:
5830 goto out; 5698 goto out;
5831 5699
5832 dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); 5700 dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
5833 if (migrate_task(p, dest_cpu)) { 5701 if (migrate_task(p, rq)) {
5834 struct migration_arg arg = { p, dest_cpu }; 5702 struct migration_arg arg = { p, dest_cpu };
5835 /* Need help from migration thread: drop lock and wait. */ 5703 /* Need help from migration thread: drop lock and wait. */
5836 task_rq_unlock(rq, &flags); 5704 task_rq_unlock(rq, &flags);
@@ -5912,29 +5780,20 @@ static int migration_cpu_stop(void *data)
5912} 5780}
5913 5781
5914#ifdef CONFIG_HOTPLUG_CPU 5782#ifdef CONFIG_HOTPLUG_CPU
5783
5915/* 5784/*
5916 * Figure out where task on dead CPU should go, use force if necessary. 5785 * Ensures that the idle task is using init_mm right before its cpu goes
5786 * offline.
5917 */ 5787 */
5918void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) 5788void idle_task_exit(void)
5919{ 5789{
5920 struct rq *rq = cpu_rq(dead_cpu); 5790 struct mm_struct *mm = current->active_mm;
5921 int needs_cpu, uninitialized_var(dest_cpu);
5922 unsigned long flags;
5923 5791
5924 local_irq_save(flags); 5792 BUG_ON(cpu_online(smp_processor_id()));
5925 5793
5926 raw_spin_lock(&rq->lock); 5794 if (mm != &init_mm)
5927 needs_cpu = (task_cpu(p) == dead_cpu) && (p->state != TASK_WAKING); 5795 switch_mm(mm, &init_mm, current);
5928 if (needs_cpu) 5796 mmdrop(mm);
5929 dest_cpu = select_fallback_rq(dead_cpu, p);
5930 raw_spin_unlock(&rq->lock);
5931 /*
5932 * It can only fail if we race with set_cpus_allowed(),
5933 * in the racer should migrate the task anyway.
5934 */
5935 if (needs_cpu)
5936 __migrate_task(p, dead_cpu, dest_cpu);
5937 local_irq_restore(flags);
5938} 5797}
5939 5798
5940/* 5799/*
@@ -5947,128 +5806,69 @@ void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
5947static void migrate_nr_uninterruptible(struct rq *rq_src) 5806static void migrate_nr_uninterruptible(struct rq *rq_src)
5948{ 5807{
5949 struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask)); 5808 struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask));
5950 unsigned long flags;
5951 5809
5952 local_irq_save(flags);
5953 double_rq_lock(rq_src, rq_dest);
5954 rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible; 5810 rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible;
5955 rq_src->nr_uninterruptible = 0; 5811 rq_src->nr_uninterruptible = 0;
5956 double_rq_unlock(rq_src, rq_dest);
5957 local_irq_restore(flags);
5958}
5959
5960/* Run through task list and migrate tasks from the dead cpu. */
5961static void migrate_live_tasks(int src_cpu)
5962{
5963 struct task_struct *p, *t;
5964
5965 read_lock(&tasklist_lock);
5966
5967 do_each_thread(t, p) {
5968 if (p == current)
5969 continue;
5970
5971 if (task_cpu(p) == src_cpu)
5972 move_task_off_dead_cpu(src_cpu, p);
5973 } while_each_thread(t, p);
5974
5975 read_unlock(&tasklist_lock);
5976} 5812}
5977 5813
5978/* 5814/*
5979 * Schedules idle task to be the next runnable task on current CPU. 5815 * remove the tasks which were accounted by rq from calc_load_tasks.
5980 * It does so by boosting its priority to highest possible.
5981 * Used by CPU offline code.
5982 */ 5816 */
5983void sched_idle_next(void) 5817static void calc_global_load_remove(struct rq *rq)
5984{ 5818{
5985 int this_cpu = smp_processor_id(); 5819 atomic_long_sub(rq->calc_load_active, &calc_load_tasks);
5986 struct rq *rq = cpu_rq(this_cpu); 5820 rq->calc_load_active = 0;
5987 struct task_struct *p = rq->idle;
5988 unsigned long flags;
5989
5990 /* cpu has to be offline */
5991 BUG_ON(cpu_online(this_cpu));
5992
5993 /*
5994 * Strictly not necessary since rest of the CPUs are stopped by now
5995 * and interrupts disabled on the current cpu.
5996 */
5997 raw_spin_lock_irqsave(&rq->lock, flags);
5998
5999 __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1);
6000
6001 activate_task(rq, p, 0);
6002
6003 raw_spin_unlock_irqrestore(&rq->lock, flags);
6004} 5821}
6005 5822
6006/* 5823/*
6007 * Ensures that the idle task is using init_mm right before its cpu goes 5824 * Migrate all tasks from the rq, sleeping tasks will be migrated by
6008 * offline. 5825 * try_to_wake_up()->select_task_rq().
5826 *
5827 * Called with rq->lock held even though we'er in stop_machine() and
5828 * there's no concurrency possible, we hold the required locks anyway
5829 * because of lock validation efforts.
6009 */ 5830 */
6010void idle_task_exit(void) 5831static void migrate_tasks(unsigned int dead_cpu)
6011{
6012 struct mm_struct *mm = current->active_mm;
6013
6014 BUG_ON(cpu_online(smp_processor_id()));
6015
6016 if (mm != &init_mm)
6017 switch_mm(mm, &init_mm, current);
6018 mmdrop(mm);
6019}
6020
6021/* called under rq->lock with disabled interrupts */
6022static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
6023{ 5832{
6024 struct rq *rq = cpu_rq(dead_cpu); 5833 struct rq *rq = cpu_rq(dead_cpu);
6025 5834 struct task_struct *next, *stop = rq->stop;
6026 /* Must be exiting, otherwise would be on tasklist. */ 5835 int dest_cpu;
6027 BUG_ON(!p->exit_state);
6028
6029 /* Cannot have done final schedule yet: would have vanished. */
6030 BUG_ON(p->state == TASK_DEAD);
6031
6032 get_task_struct(p);
6033 5836
6034 /* 5837 /*
6035 * Drop lock around migration; if someone else moves it, 5838 * Fudge the rq selection such that the below task selection loop
6036 * that's OK. No task can be added to this CPU, so iteration is 5839 * doesn't get stuck on the currently eligible stop task.
6037 * fine. 5840 *
5841 * We're currently inside stop_machine() and the rq is either stuck
5842 * in the stop_machine_cpu_stop() loop, or we're executing this code,
5843 * either way we should never end up calling schedule() until we're
5844 * done here.
6038 */ 5845 */
6039 raw_spin_unlock_irq(&rq->lock); 5846 rq->stop = NULL;
6040 move_task_off_dead_cpu(dead_cpu, p);
6041 raw_spin_lock_irq(&rq->lock);
6042
6043 put_task_struct(p);
6044}
6045
6046/* release_task() removes task from tasklist, so we won't find dead tasks. */
6047static void migrate_dead_tasks(unsigned int dead_cpu)
6048{
6049 struct rq *rq = cpu_rq(dead_cpu);
6050 struct task_struct *next;
6051 5847
6052 for ( ; ; ) { 5848 for ( ; ; ) {
6053 if (!rq->nr_running) 5849 /*
5850 * There's this thread running, bail when that's the only
5851 * remaining thread.
5852 */
5853 if (rq->nr_running == 1)
6054 break; 5854 break;
5855
6055 next = pick_next_task(rq); 5856 next = pick_next_task(rq);
6056 if (!next) 5857 BUG_ON(!next);
6057 break;
6058 next->sched_class->put_prev_task(rq, next); 5858 next->sched_class->put_prev_task(rq, next);
6059 migrate_dead(dead_cpu, next);
6060 5859
5860 /* Find suitable destination for @next, with force if needed. */
5861 dest_cpu = select_fallback_rq(dead_cpu, next);
5862 raw_spin_unlock(&rq->lock);
5863
5864 __migrate_task(next, dead_cpu, dest_cpu);
5865
5866 raw_spin_lock(&rq->lock);
6061 } 5867 }
6062}
6063 5868
6064/* 5869 rq->stop = stop;
6065 * remove the tasks which were accounted by rq from calc_load_tasks.
6066 */
6067static void calc_global_load_remove(struct rq *rq)
6068{
6069 atomic_long_sub(rq->calc_load_active, &calc_load_tasks);
6070 rq->calc_load_active = 0;
6071} 5870}
5871
6072#endif /* CONFIG_HOTPLUG_CPU */ 5872#endif /* CONFIG_HOTPLUG_CPU */
6073 5873
6074#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) 5874#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
@@ -6278,15 +6078,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
6278 unsigned long flags; 6078 unsigned long flags;
6279 struct rq *rq = cpu_rq(cpu); 6079 struct rq *rq = cpu_rq(cpu);
6280 6080
6281 switch (action) { 6081 switch (action & ~CPU_TASKS_FROZEN) {
6282 6082
6283 case CPU_UP_PREPARE: 6083 case CPU_UP_PREPARE:
6284 case CPU_UP_PREPARE_FROZEN:
6285 rq->calc_load_update = calc_load_update; 6084 rq->calc_load_update = calc_load_update;
6286 break; 6085 break;
6287 6086
6288 case CPU_ONLINE: 6087 case CPU_ONLINE:
6289 case CPU_ONLINE_FROZEN:
6290 /* Update our root-domain */ 6088 /* Update our root-domain */
6291 raw_spin_lock_irqsave(&rq->lock, flags); 6089 raw_spin_lock_irqsave(&rq->lock, flags);
6292 if (rq->rd) { 6090 if (rq->rd) {
@@ -6298,30 +6096,19 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
6298 break; 6096 break;
6299 6097
6300#ifdef CONFIG_HOTPLUG_CPU 6098#ifdef CONFIG_HOTPLUG_CPU
6301 case CPU_DEAD:
6302 case CPU_DEAD_FROZEN:
6303 migrate_live_tasks(cpu);
6304 /* Idle task back to normal (off runqueue, low prio) */
6305 raw_spin_lock_irq(&rq->lock);
6306 deactivate_task(rq, rq->idle, 0);
6307 __setscheduler(rq, rq->idle, SCHED_NORMAL, 0);
6308 rq->idle->sched_class = &idle_sched_class;
6309 migrate_dead_tasks(cpu);
6310 raw_spin_unlock_irq(&rq->lock);
6311 migrate_nr_uninterruptible(rq);
6312 BUG_ON(rq->nr_running != 0);
6313 calc_global_load_remove(rq);
6314 break;
6315
6316 case CPU_DYING: 6099 case CPU_DYING:
6317 case CPU_DYING_FROZEN:
6318 /* Update our root-domain */ 6100 /* Update our root-domain */
6319 raw_spin_lock_irqsave(&rq->lock, flags); 6101 raw_spin_lock_irqsave(&rq->lock, flags);
6320 if (rq->rd) { 6102 if (rq->rd) {
6321 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); 6103 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
6322 set_rq_offline(rq); 6104 set_rq_offline(rq);
6323 } 6105 }
6106 migrate_tasks(cpu);
6107 BUG_ON(rq->nr_running != 1); /* the migration thread */
6324 raw_spin_unlock_irqrestore(&rq->lock, flags); 6108 raw_spin_unlock_irqrestore(&rq->lock, flags);
6109
6110 migrate_nr_uninterruptible(rq);
6111 calc_global_load_remove(rq);
6325 break; 6112 break;
6326#endif 6113#endif
6327 } 6114 }
@@ -8052,15 +7839,13 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
8052 7839
8053#ifdef CONFIG_FAIR_GROUP_SCHED 7840#ifdef CONFIG_FAIR_GROUP_SCHED
8054static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, 7841static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
8055 struct sched_entity *se, int cpu, int add, 7842 struct sched_entity *se, int cpu,
8056 struct sched_entity *parent) 7843 struct sched_entity *parent)
8057{ 7844{
8058 struct rq *rq = cpu_rq(cpu); 7845 struct rq *rq = cpu_rq(cpu);
8059 tg->cfs_rq[cpu] = cfs_rq; 7846 tg->cfs_rq[cpu] = cfs_rq;
8060 init_cfs_rq(cfs_rq, rq); 7847 init_cfs_rq(cfs_rq, rq);
8061 cfs_rq->tg = tg; 7848 cfs_rq->tg = tg;
8062 if (add)
8063 list_add(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
8064 7849
8065 tg->se[cpu] = se; 7850 tg->se[cpu] = se;
8066 /* se could be NULL for init_task_group */ 7851 /* se could be NULL for init_task_group */
@@ -8073,15 +7858,14 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
8073 se->cfs_rq = parent->my_q; 7858 se->cfs_rq = parent->my_q;
8074 7859
8075 se->my_q = cfs_rq; 7860 se->my_q = cfs_rq;
8076 se->load.weight = tg->shares; 7861 update_load_set(&se->load, 0);
8077 se->load.inv_weight = 0;
8078 se->parent = parent; 7862 se->parent = parent;
8079} 7863}
8080#endif 7864#endif
8081 7865
8082#ifdef CONFIG_RT_GROUP_SCHED 7866#ifdef CONFIG_RT_GROUP_SCHED
8083static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, 7867static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
8084 struct sched_rt_entity *rt_se, int cpu, int add, 7868 struct sched_rt_entity *rt_se, int cpu,
8085 struct sched_rt_entity *parent) 7869 struct sched_rt_entity *parent)
8086{ 7870{
8087 struct rq *rq = cpu_rq(cpu); 7871 struct rq *rq = cpu_rq(cpu);
@@ -8090,8 +7874,6 @@ static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
8090 init_rt_rq(rt_rq, rq); 7874 init_rt_rq(rt_rq, rq);
8091 rt_rq->tg = tg; 7875 rt_rq->tg = tg;
8092 rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime; 7876 rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
8093 if (add)
8094 list_add(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
8095 7877
8096 tg->rt_se[cpu] = rt_se; 7878 tg->rt_se[cpu] = rt_se;
8097 if (!rt_se) 7879 if (!rt_se)
@@ -8164,13 +7946,9 @@ void __init sched_init(void)
8164#ifdef CONFIG_CGROUP_SCHED 7946#ifdef CONFIG_CGROUP_SCHED
8165 list_add(&init_task_group.list, &task_groups); 7947 list_add(&init_task_group.list, &task_groups);
8166 INIT_LIST_HEAD(&init_task_group.children); 7948 INIT_LIST_HEAD(&init_task_group.children);
8167 7949 autogroup_init(&init_task);
8168#endif /* CONFIG_CGROUP_SCHED */ 7950#endif /* CONFIG_CGROUP_SCHED */
8169 7951
8170#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
8171 update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long),
8172 __alignof__(unsigned long));
8173#endif
8174 for_each_possible_cpu(i) { 7952 for_each_possible_cpu(i) {
8175 struct rq *rq; 7953 struct rq *rq;
8176 7954
@@ -8184,7 +7962,6 @@ void __init sched_init(void)
8184#ifdef CONFIG_FAIR_GROUP_SCHED 7962#ifdef CONFIG_FAIR_GROUP_SCHED
8185 init_task_group.shares = init_task_group_load; 7963 init_task_group.shares = init_task_group_load;
8186 INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); 7964 INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
8187#ifdef CONFIG_CGROUP_SCHED
8188 /* 7965 /*
8189 * How much cpu bandwidth does init_task_group get? 7966 * How much cpu bandwidth does init_task_group get?
8190 * 7967 *
@@ -8204,16 +7981,13 @@ void __init sched_init(void)
8204 * We achieve this by letting init_task_group's tasks sit 7981 * We achieve this by letting init_task_group's tasks sit
8205 * directly in rq->cfs (i.e init_task_group->se[] = NULL). 7982 * directly in rq->cfs (i.e init_task_group->se[] = NULL).
8206 */ 7983 */
8207 init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, 1, NULL); 7984 init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, NULL);
8208#endif
8209#endif /* CONFIG_FAIR_GROUP_SCHED */ 7985#endif /* CONFIG_FAIR_GROUP_SCHED */
8210 7986
8211 rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime; 7987 rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
8212#ifdef CONFIG_RT_GROUP_SCHED 7988#ifdef CONFIG_RT_GROUP_SCHED
8213 INIT_LIST_HEAD(&rq->leaf_rt_rq_list); 7989 INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
8214#ifdef CONFIG_CGROUP_SCHED 7990 init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, NULL);
8215 init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, 1, NULL);
8216#endif
8217#endif 7991#endif
8218 7992
8219 for (j = 0; j < CPU_LOAD_IDX_MAX; j++) 7993 for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
@@ -8293,8 +8067,6 @@ void __init sched_init(void)
8293 zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); 8067 zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
8294#endif /* SMP */ 8068#endif /* SMP */
8295 8069
8296 perf_event_init();
8297
8298 scheduler_running = 1; 8070 scheduler_running = 1;
8299} 8071}
8300 8072
@@ -8488,7 +8260,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
8488 if (!se) 8260 if (!se)
8489 goto err_free_rq; 8261 goto err_free_rq;
8490 8262
8491 init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); 8263 init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
8492 } 8264 }
8493 8265
8494 return 1; 8266 return 1;
@@ -8499,15 +8271,21 @@ err:
8499 return 0; 8271 return 0;
8500} 8272}
8501 8273
8502static inline void register_fair_sched_group(struct task_group *tg, int cpu)
8503{
8504 list_add_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list,
8505 &cpu_rq(cpu)->leaf_cfs_rq_list);
8506}
8507
8508static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) 8274static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
8509{ 8275{
8510 list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list); 8276 struct rq *rq = cpu_rq(cpu);
8277 unsigned long flags;
8278
8279 /*
8280 * Only empty task groups can be destroyed; so we can speculatively
8281 * check on_list without danger of it being re-added.
8282 */
8283 if (!tg->cfs_rq[cpu]->on_list)
8284 return;
8285
8286 raw_spin_lock_irqsave(&rq->lock, flags);
8287 list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
8288 raw_spin_unlock_irqrestore(&rq->lock, flags);
8511} 8289}
8512#else /* !CONFG_FAIR_GROUP_SCHED */ 8290#else /* !CONFG_FAIR_GROUP_SCHED */
8513static inline void free_fair_sched_group(struct task_group *tg) 8291static inline void free_fair_sched_group(struct task_group *tg)
@@ -8520,10 +8298,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
8520 return 1; 8298 return 1;
8521} 8299}
8522 8300
8523static inline void register_fair_sched_group(struct task_group *tg, int cpu)
8524{
8525}
8526
8527static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) 8301static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
8528{ 8302{
8529} 8303}
@@ -8578,7 +8352,7 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
8578 if (!rt_se) 8352 if (!rt_se)
8579 goto err_free_rq; 8353 goto err_free_rq;
8580 8354
8581 init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); 8355 init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
8582 } 8356 }
8583 8357
8584 return 1; 8358 return 1;
@@ -8588,17 +8362,6 @@ err_free_rq:
8588err: 8362err:
8589 return 0; 8363 return 0;
8590} 8364}
8591
8592static inline void register_rt_sched_group(struct task_group *tg, int cpu)
8593{
8594 list_add_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list,
8595 &cpu_rq(cpu)->leaf_rt_rq_list);
8596}
8597
8598static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
8599{
8600 list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list);
8601}
8602#else /* !CONFIG_RT_GROUP_SCHED */ 8365#else /* !CONFIG_RT_GROUP_SCHED */
8603static inline void free_rt_sched_group(struct task_group *tg) 8366static inline void free_rt_sched_group(struct task_group *tg)
8604{ 8367{
@@ -8609,14 +8372,6 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
8609{ 8372{
8610 return 1; 8373 return 1;
8611} 8374}
8612
8613static inline void register_rt_sched_group(struct task_group *tg, int cpu)
8614{
8615}
8616
8617static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
8618{
8619}
8620#endif /* CONFIG_RT_GROUP_SCHED */ 8375#endif /* CONFIG_RT_GROUP_SCHED */
8621 8376
8622#ifdef CONFIG_CGROUP_SCHED 8377#ifdef CONFIG_CGROUP_SCHED
@@ -8632,7 +8387,6 @@ struct task_group *sched_create_group(struct task_group *parent)
8632{ 8387{
8633 struct task_group *tg; 8388 struct task_group *tg;
8634 unsigned long flags; 8389 unsigned long flags;
8635 int i;
8636 8390
8637 tg = kzalloc(sizeof(*tg), GFP_KERNEL); 8391 tg = kzalloc(sizeof(*tg), GFP_KERNEL);
8638 if (!tg) 8392 if (!tg)
@@ -8645,10 +8399,6 @@ struct task_group *sched_create_group(struct task_group *parent)
8645 goto err; 8399 goto err;
8646 8400
8647 spin_lock_irqsave(&task_group_lock, flags); 8401 spin_lock_irqsave(&task_group_lock, flags);
8648 for_each_possible_cpu(i) {
8649 register_fair_sched_group(tg, i);
8650 register_rt_sched_group(tg, i);
8651 }
8652 list_add_rcu(&tg->list, &task_groups); 8402 list_add_rcu(&tg->list, &task_groups);
8653 8403
8654 WARN_ON(!parent); /* root should already exist */ 8404 WARN_ON(!parent); /* root should already exist */
@@ -8678,11 +8428,11 @@ void sched_destroy_group(struct task_group *tg)
8678 unsigned long flags; 8428 unsigned long flags;
8679 int i; 8429 int i;
8680 8430
8681 spin_lock_irqsave(&task_group_lock, flags); 8431 /* end participation in shares distribution */
8682 for_each_possible_cpu(i) { 8432 for_each_possible_cpu(i)
8683 unregister_fair_sched_group(tg, i); 8433 unregister_fair_sched_group(tg, i);
8684 unregister_rt_sched_group(tg, i); 8434
8685 } 8435 spin_lock_irqsave(&task_group_lock, flags);
8686 list_del_rcu(&tg->list); 8436 list_del_rcu(&tg->list);
8687 list_del_rcu(&tg->siblings); 8437 list_del_rcu(&tg->siblings);
8688 spin_unlock_irqrestore(&task_group_lock, flags); 8438 spin_unlock_irqrestore(&task_group_lock, flags);
@@ -8729,33 +8479,6 @@ void sched_move_task(struct task_struct *tsk)
8729#endif /* CONFIG_CGROUP_SCHED */ 8479#endif /* CONFIG_CGROUP_SCHED */
8730 8480
8731#ifdef CONFIG_FAIR_GROUP_SCHED 8481#ifdef CONFIG_FAIR_GROUP_SCHED
8732static void __set_se_shares(struct sched_entity *se, unsigned long shares)
8733{
8734 struct cfs_rq *cfs_rq = se->cfs_rq;
8735 int on_rq;
8736
8737 on_rq = se->on_rq;
8738 if (on_rq)
8739 dequeue_entity(cfs_rq, se, 0);
8740
8741 se->load.weight = shares;
8742 se->load.inv_weight = 0;
8743
8744 if (on_rq)
8745 enqueue_entity(cfs_rq, se, 0);
8746}
8747
8748static void set_se_shares(struct sched_entity *se, unsigned long shares)
8749{
8750 struct cfs_rq *cfs_rq = se->cfs_rq;
8751 struct rq *rq = cfs_rq->rq;
8752 unsigned long flags;
8753
8754 raw_spin_lock_irqsave(&rq->lock, flags);
8755 __set_se_shares(se, shares);
8756 raw_spin_unlock_irqrestore(&rq->lock, flags);
8757}
8758
8759static DEFINE_MUTEX(shares_mutex); 8482static DEFINE_MUTEX(shares_mutex);
8760 8483
8761int sched_group_set_shares(struct task_group *tg, unsigned long shares) 8484int sched_group_set_shares(struct task_group *tg, unsigned long shares)
@@ -8778,37 +8501,19 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
8778 if (tg->shares == shares) 8501 if (tg->shares == shares)
8779 goto done; 8502 goto done;
8780 8503
8781 spin_lock_irqsave(&task_group_lock, flags);
8782 for_each_possible_cpu(i)
8783 unregister_fair_sched_group(tg, i);
8784 list_del_rcu(&tg->siblings);
8785 spin_unlock_irqrestore(&task_group_lock, flags);
8786
8787 /* wait for any ongoing reference to this group to finish */
8788 synchronize_sched();
8789
8790 /*
8791 * Now we are free to modify the group's share on each cpu
8792 * w/o tripping rebalance_share or load_balance_fair.
8793 */
8794 tg->shares = shares; 8504 tg->shares = shares;
8795 for_each_possible_cpu(i) { 8505 for_each_possible_cpu(i) {
8796 /* 8506 struct rq *rq = cpu_rq(i);
8797 * force a rebalance 8507 struct sched_entity *se;
8798 */ 8508
8799 cfs_rq_set_shares(tg->cfs_rq[i], 0); 8509 se = tg->se[i];
8800 set_se_shares(tg->se[i], shares); 8510 /* Propagate contribution to hierarchy */
8511 raw_spin_lock_irqsave(&rq->lock, flags);
8512 for_each_sched_entity(se)
8513 update_cfs_shares(group_cfs_rq(se), 0);
8514 raw_spin_unlock_irqrestore(&rq->lock, flags);
8801 } 8515 }
8802 8516
8803 /*
8804 * Enable load balance activity on this group, by inserting it back on
8805 * each cpu's rq->leaf_cfs_rq_list.
8806 */
8807 spin_lock_irqsave(&task_group_lock, flags);
8808 for_each_possible_cpu(i)
8809 register_fair_sched_group(tg, i);
8810 list_add_rcu(&tg->siblings, &tg->parent->children);
8811 spin_unlock_irqrestore(&task_group_lock, flags);
8812done: 8517done:
8813 mutex_unlock(&shares_mutex); 8518 mutex_unlock(&shares_mutex);
8814 return 0; 8519 return 0;
@@ -9534,72 +9239,3 @@ struct cgroup_subsys cpuacct_subsys = {
9534}; 9239};
9535#endif /* CONFIG_CGROUP_CPUACCT */ 9240#endif /* CONFIG_CGROUP_CPUACCT */
9536 9241
9537#ifndef CONFIG_SMP
9538
9539void synchronize_sched_expedited(void)
9540{
9541 barrier();
9542}
9543EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
9544
9545#else /* #ifndef CONFIG_SMP */
9546
9547static atomic_t synchronize_sched_expedited_count = ATOMIC_INIT(0);
9548
9549static int synchronize_sched_expedited_cpu_stop(void *data)
9550{
9551 /*
9552 * There must be a full memory barrier on each affected CPU
9553 * between the time that try_stop_cpus() is called and the
9554 * time that it returns.
9555 *
9556 * In the current initial implementation of cpu_stop, the
9557 * above condition is already met when the control reaches
9558 * this point and the following smp_mb() is not strictly
9559 * necessary. Do smp_mb() anyway for documentation and
9560 * robustness against future implementation changes.
9561 */
9562 smp_mb(); /* See above comment block. */
9563 return 0;
9564}
9565
9566/*
9567 * Wait for an rcu-sched grace period to elapse, but use "big hammer"
9568 * approach to force grace period to end quickly. This consumes
9569 * significant time on all CPUs, and is thus not recommended for
9570 * any sort of common-case code.
9571 *
9572 * Note that it is illegal to call this function while holding any
9573 * lock that is acquired by a CPU-hotplug notifier. Failing to
9574 * observe this restriction will result in deadlock.
9575 */
9576void synchronize_sched_expedited(void)
9577{
9578 int snap, trycount = 0;
9579
9580 smp_mb(); /* ensure prior mod happens before capturing snap. */
9581 snap = atomic_read(&synchronize_sched_expedited_count) + 1;
9582 get_online_cpus();
9583 while (try_stop_cpus(cpu_online_mask,
9584 synchronize_sched_expedited_cpu_stop,
9585 NULL) == -EAGAIN) {
9586 put_online_cpus();
9587 if (trycount++ < 10)
9588 udelay(trycount * num_online_cpus());
9589 else {
9590 synchronize_sched();
9591 return;
9592 }
9593 if (atomic_read(&synchronize_sched_expedited_count) - snap > 0) {
9594 smp_mb(); /* ensure test happens before caller kfree */
9595 return;
9596 }
9597 get_online_cpus();
9598 }
9599 atomic_inc(&synchronize_sched_expedited_count);
9600 smp_mb__after_atomic_inc(); /* ensure post-GP actions seen after GP. */
9601 put_online_cpus();
9602}
9603EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
9604
9605#endif /* #else #ifndef CONFIG_SMP */
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c
new file mode 100644
index 000000000000..c80fedcd476b
--- /dev/null
+++ b/kernel/sched_autogroup.c
@@ -0,0 +1,238 @@
1#ifdef CONFIG_SCHED_AUTOGROUP
2
3#include <linux/proc_fs.h>
4#include <linux/seq_file.h>
5#include <linux/kallsyms.h>
6#include <linux/utsname.h>
7
8unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
9static struct autogroup autogroup_default;
10static atomic_t autogroup_seq_nr;
11
12static void autogroup_init(struct task_struct *init_task)
13{
14 autogroup_default.tg = &init_task_group;
15 init_task_group.autogroup = &autogroup_default;
16 kref_init(&autogroup_default.kref);
17 init_rwsem(&autogroup_default.lock);
18 init_task->signal->autogroup = &autogroup_default;
19}
20
21static inline void autogroup_free(struct task_group *tg)
22{
23 kfree(tg->autogroup);
24}
25
26static inline void autogroup_destroy(struct kref *kref)
27{
28 struct autogroup *ag = container_of(kref, struct autogroup, kref);
29
30 sched_destroy_group(ag->tg);
31}
32
33static inline void autogroup_kref_put(struct autogroup *ag)
34{
35 kref_put(&ag->kref, autogroup_destroy);
36}
37
38static inline struct autogroup *autogroup_kref_get(struct autogroup *ag)
39{
40 kref_get(&ag->kref);
41 return ag;
42}
43
44static inline struct autogroup *autogroup_task_get(struct task_struct *p)
45{
46 struct autogroup *ag;
47 unsigned long flags;
48
49 if (!lock_task_sighand(p, &flags))
50 return autogroup_kref_get(&autogroup_default);
51
52 ag = autogroup_kref_get(p->signal->autogroup);
53 unlock_task_sighand(p, &flags);
54
55 return ag;
56}
57
58static inline struct autogroup *autogroup_create(void)
59{
60 struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL);
61 struct task_group *tg;
62
63 if (!ag)
64 goto out_fail;
65
66 tg = sched_create_group(&init_task_group);
67
68 if (IS_ERR(tg))
69 goto out_free;
70
71 kref_init(&ag->kref);
72 init_rwsem(&ag->lock);
73 ag->id = atomic_inc_return(&autogroup_seq_nr);
74 ag->tg = tg;
75 tg->autogroup = ag;
76
77 return ag;
78
79out_free:
80 kfree(ag);
81out_fail:
82 if (printk_ratelimit()) {
83 printk(KERN_WARNING "autogroup_create: %s failure.\n",
84 ag ? "sched_create_group()" : "kmalloc()");
85 }
86
87 return autogroup_kref_get(&autogroup_default);
88}
89
90static inline bool
91task_wants_autogroup(struct task_struct *p, struct task_group *tg)
92{
93 if (tg != &root_task_group)
94 return false;
95
96 if (p->sched_class != &fair_sched_class)
97 return false;
98
99 /*
100 * We can only assume the task group can't go away on us if
101 * autogroup_move_group() can see us on ->thread_group list.
102 */
103 if (p->flags & PF_EXITING)
104 return false;
105
106 return true;
107}
108
109static inline struct task_group *
110autogroup_task_group(struct task_struct *p, struct task_group *tg)
111{
112 int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
113
114 if (enabled && task_wants_autogroup(p, tg))
115 return p->signal->autogroup->tg;
116
117 return tg;
118}
119
120static void
121autogroup_move_group(struct task_struct *p, struct autogroup *ag)
122{
123 struct autogroup *prev;
124 struct task_struct *t;
125 unsigned long flags;
126
127 BUG_ON(!lock_task_sighand(p, &flags));
128
129 prev = p->signal->autogroup;
130 if (prev == ag) {
131 unlock_task_sighand(p, &flags);
132 return;
133 }
134
135 p->signal->autogroup = autogroup_kref_get(ag);
136
137 t = p;
138 do {
139 sched_move_task(t);
140 } while_each_thread(p, t);
141
142 unlock_task_sighand(p, &flags);
143 autogroup_kref_put(prev);
144}
145
146/* Allocates GFP_KERNEL, cannot be called under any spinlock */
147void sched_autogroup_create_attach(struct task_struct *p)
148{
149 struct autogroup *ag = autogroup_create();
150
151 autogroup_move_group(p, ag);
152 /* drop extra refrence added by autogroup_create() */
153 autogroup_kref_put(ag);
154}
155EXPORT_SYMBOL(sched_autogroup_create_attach);
156
157/* Cannot be called under siglock. Currently has no users */
158void sched_autogroup_detach(struct task_struct *p)
159{
160 autogroup_move_group(p, &autogroup_default);
161}
162EXPORT_SYMBOL(sched_autogroup_detach);
163
164void sched_autogroup_fork(struct signal_struct *sig)
165{
166 sig->autogroup = autogroup_task_get(current);
167}
168
169void sched_autogroup_exit(struct signal_struct *sig)
170{
171 autogroup_kref_put(sig->autogroup);
172}
173
174static int __init setup_autogroup(char *str)
175{
176 sysctl_sched_autogroup_enabled = 0;
177
178 return 1;
179}
180
181__setup("noautogroup", setup_autogroup);
182
183#ifdef CONFIG_PROC_FS
184
185int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice)
186{
187 static unsigned long next = INITIAL_JIFFIES;
188 struct autogroup *ag;
189 int err;
190
191 if (*nice < -20 || *nice > 19)
192 return -EINVAL;
193
194 err = security_task_setnice(current, *nice);
195 if (err)
196 return err;
197
198 if (*nice < 0 && !can_nice(current, *nice))
199 return -EPERM;
200
201 /* this is a heavy operation taking global locks.. */
202 if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next))
203 return -EAGAIN;
204
205 next = HZ / 10 + jiffies;
206 ag = autogroup_task_get(p);
207
208 down_write(&ag->lock);
209 err = sched_group_set_shares(ag->tg, prio_to_weight[*nice + 20]);
210 if (!err)
211 ag->nice = *nice;
212 up_write(&ag->lock);
213
214 autogroup_kref_put(ag);
215
216 return err;
217}
218
219void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m)
220{
221 struct autogroup *ag = autogroup_task_get(p);
222
223 down_read(&ag->lock);
224 seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice);
225 up_read(&ag->lock);
226
227 autogroup_kref_put(ag);
228}
229#endif /* CONFIG_PROC_FS */
230
231#ifdef CONFIG_SCHED_DEBUG
232static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
233{
234 return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id);
235}
236#endif /* CONFIG_SCHED_DEBUG */
237
238#endif /* CONFIG_SCHED_AUTOGROUP */
diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h
new file mode 100644
index 000000000000..5358e241cb20
--- /dev/null
+++ b/kernel/sched_autogroup.h
@@ -0,0 +1,32 @@
1#ifdef CONFIG_SCHED_AUTOGROUP
2
3struct autogroup {
4 struct kref kref;
5 struct task_group *tg;
6 struct rw_semaphore lock;
7 unsigned long id;
8 int nice;
9};
10
11static inline struct task_group *
12autogroup_task_group(struct task_struct *p, struct task_group *tg);
13
14#else /* !CONFIG_SCHED_AUTOGROUP */
15
16static inline void autogroup_init(struct task_struct *init_task) { }
17static inline void autogroup_free(struct task_group *tg) { }
18
19static inline struct task_group *
20autogroup_task_group(struct task_struct *p, struct task_group *tg)
21{
22 return tg;
23}
24
25#ifdef CONFIG_SCHED_DEBUG
26static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
27{
28 return 0;
29}
30#endif
31
32#endif /* CONFIG_SCHED_AUTOGROUP */
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index 52f1a149bfb1..9d8af0b3fb64 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -79,7 +79,7 @@ unsigned long long __attribute__((weak)) sched_clock(void)
79} 79}
80EXPORT_SYMBOL_GPL(sched_clock); 80EXPORT_SYMBOL_GPL(sched_clock);
81 81
82static __read_mostly int sched_clock_running; 82__read_mostly int sched_clock_running;
83 83
84#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK 84#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
85__read_mostly int sched_clock_stable; 85__read_mostly int sched_clock_stable;
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 2e1b0d17dd9b..1dfae3d014b5 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -54,8 +54,7 @@ static unsigned long nsec_low(unsigned long long nsec)
54#define SPLIT_NS(x) nsec_high(x), nsec_low(x) 54#define SPLIT_NS(x) nsec_high(x), nsec_low(x)
55 55
56#ifdef CONFIG_FAIR_GROUP_SCHED 56#ifdef CONFIG_FAIR_GROUP_SCHED
57static void print_cfs_group_stats(struct seq_file *m, int cpu, 57static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg)
58 struct task_group *tg)
59{ 58{
60 struct sched_entity *se = tg->se[cpu]; 59 struct sched_entity *se = tg->se[cpu];
61 if (!se) 60 if (!se)
@@ -110,16 +109,6 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
110 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L); 109 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L);
111#endif 110#endif
112 111
113#ifdef CONFIG_CGROUP_SCHED
114 {
115 char path[64];
116
117 rcu_read_lock();
118 cgroup_path(task_group(p)->css.cgroup, path, sizeof(path));
119 rcu_read_unlock();
120 SEQ_printf(m, " %s", path);
121 }
122#endif
123 SEQ_printf(m, "\n"); 112 SEQ_printf(m, "\n");
124} 113}
125 114
@@ -147,19 +136,6 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
147 read_unlock_irqrestore(&tasklist_lock, flags); 136 read_unlock_irqrestore(&tasklist_lock, flags);
148} 137}
149 138
150#if defined(CONFIG_CGROUP_SCHED) && \
151 (defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED))
152static void task_group_path(struct task_group *tg, char *buf, int buflen)
153{
154 /* may be NULL if the underlying cgroup isn't fully-created yet */
155 if (!tg->css.cgroup) {
156 buf[0] = '\0';
157 return;
158 }
159 cgroup_path(tg->css.cgroup, buf, buflen);
160}
161#endif
162
163void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) 139void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
164{ 140{
165 s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1, 141 s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
@@ -168,16 +144,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
168 struct sched_entity *last; 144 struct sched_entity *last;
169 unsigned long flags; 145 unsigned long flags;
170 146
171#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
172 char path[128];
173 struct task_group *tg = cfs_rq->tg;
174
175 task_group_path(tg, path, sizeof(path));
176
177 SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path);
178#else
179 SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); 147 SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
180#endif
181 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", 148 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock",
182 SPLIT_NS(cfs_rq->exec_clock)); 149 SPLIT_NS(cfs_rq->exec_clock));
183 150
@@ -202,32 +169,29 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
202 spread0 = min_vruntime - rq0_min_vruntime; 169 spread0 = min_vruntime - rq0_min_vruntime;
203 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0", 170 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0",
204 SPLIT_NS(spread0)); 171 SPLIT_NS(spread0));
205 SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
206 SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
207
208 SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over", 172 SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over",
209 cfs_rq->nr_spread_over); 173 cfs_rq->nr_spread_over);
174 SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
175 SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
210#ifdef CONFIG_FAIR_GROUP_SCHED 176#ifdef CONFIG_FAIR_GROUP_SCHED
211#ifdef CONFIG_SMP 177#ifdef CONFIG_SMP
212 SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares); 178 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_avg",
179 SPLIT_NS(cfs_rq->load_avg));
180 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_period",
181 SPLIT_NS(cfs_rq->load_period));
182 SEQ_printf(m, " .%-30s: %ld\n", "load_contrib",
183 cfs_rq->load_contribution);
184 SEQ_printf(m, " .%-30s: %d\n", "load_tg",
185 atomic_read(&cfs_rq->tg->load_weight));
213#endif 186#endif
187
214 print_cfs_group_stats(m, cpu, cfs_rq->tg); 188 print_cfs_group_stats(m, cpu, cfs_rq->tg);
215#endif 189#endif
216} 190}
217 191
218void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) 192void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
219{ 193{
220#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED)
221 char path[128];
222 struct task_group *tg = rt_rq->tg;
223
224 task_group_path(tg, path, sizeof(path));
225
226 SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path);
227#else
228 SEQ_printf(m, "\nrt_rq[%d]:\n", cpu); 194 SEQ_printf(m, "\nrt_rq[%d]:\n", cpu);
229#endif
230
231 195
232#define P(x) \ 196#define P(x) \
233 SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) 197 SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
@@ -243,6 +207,8 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
243#undef P 207#undef P
244} 208}
245 209
210extern __read_mostly int sched_clock_running;
211
246static void print_cpu(struct seq_file *m, int cpu) 212static void print_cpu(struct seq_file *m, int cpu)
247{ 213{
248 struct rq *rq = cpu_rq(cpu); 214 struct rq *rq = cpu_rq(cpu);
@@ -314,21 +280,42 @@ static const char *sched_tunable_scaling_names[] = {
314 280
315static int sched_debug_show(struct seq_file *m, void *v) 281static int sched_debug_show(struct seq_file *m, void *v)
316{ 282{
317 u64 now = ktime_to_ns(ktime_get()); 283 u64 ktime, sched_clk, cpu_clk;
284 unsigned long flags;
318 int cpu; 285 int cpu;
319 286
320 SEQ_printf(m, "Sched Debug Version: v0.09, %s %.*s\n", 287 local_irq_save(flags);
288 ktime = ktime_to_ns(ktime_get());
289 sched_clk = sched_clock();
290 cpu_clk = local_clock();
291 local_irq_restore(flags);
292
293 SEQ_printf(m, "Sched Debug Version: v0.10, %s %.*s\n",
321 init_utsname()->release, 294 init_utsname()->release,
322 (int)strcspn(init_utsname()->version, " "), 295 (int)strcspn(init_utsname()->version, " "),
323 init_utsname()->version); 296 init_utsname()->version);
324 297
325 SEQ_printf(m, "now at %Lu.%06ld msecs\n", SPLIT_NS(now)); 298#define P(x) \
299 SEQ_printf(m, "%-40s: %Ld\n", #x, (long long)(x))
300#define PN(x) \
301 SEQ_printf(m, "%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
302 PN(ktime);
303 PN(sched_clk);
304 PN(cpu_clk);
305 P(jiffies);
306#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
307 P(sched_clock_stable);
308#endif
309#undef PN
310#undef P
311
312 SEQ_printf(m, "\n");
313 SEQ_printf(m, "sysctl_sched\n");
326 314
327#define P(x) \ 315#define P(x) \
328 SEQ_printf(m, " .%-40s: %Ld\n", #x, (long long)(x)) 316 SEQ_printf(m, " .%-40s: %Ld\n", #x, (long long)(x))
329#define PN(x) \ 317#define PN(x) \
330 SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x)) 318 SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
331 P(jiffies);
332 PN(sysctl_sched_latency); 319 PN(sysctl_sched_latency);
333 PN(sysctl_sched_min_granularity); 320 PN(sysctl_sched_min_granularity);
334 PN(sysctl_sched_wakeup_granularity); 321 PN(sysctl_sched_wakeup_granularity);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 00ebd7686676..c62ebae65cf0 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -89,6 +89,13 @@ unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
89 89
90const_debug unsigned int sysctl_sched_migration_cost = 500000UL; 90const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
91 91
92/*
93 * The exponential sliding window over which load is averaged for shares
94 * distribution.
95 * (default: 10msec)
96 */
97unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL;
98
92static const struct sched_class fair_sched_class; 99static const struct sched_class fair_sched_class;
93 100
94/************************************************************** 101/**************************************************************
@@ -143,6 +150,36 @@ static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu)
143 return cfs_rq->tg->cfs_rq[this_cpu]; 150 return cfs_rq->tg->cfs_rq[this_cpu];
144} 151}
145 152
153static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
154{
155 if (!cfs_rq->on_list) {
156 /*
157 * Ensure we either appear before our parent (if already
158 * enqueued) or force our parent to appear after us when it is
159 * enqueued. The fact that we always enqueue bottom-up
160 * reduces this to two cases.
161 */
162 if (cfs_rq->tg->parent &&
163 cfs_rq->tg->parent->cfs_rq[cpu_of(rq_of(cfs_rq))]->on_list) {
164 list_add_rcu(&cfs_rq->leaf_cfs_rq_list,
165 &rq_of(cfs_rq)->leaf_cfs_rq_list);
166 } else {
167 list_add_tail_rcu(&cfs_rq->leaf_cfs_rq_list,
168 &rq_of(cfs_rq)->leaf_cfs_rq_list);
169 }
170
171 cfs_rq->on_list = 1;
172 }
173}
174
175static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
176{
177 if (cfs_rq->on_list) {
178 list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
179 cfs_rq->on_list = 0;
180 }
181}
182
146/* Iterate thr' all leaf cfs_rq's on a runqueue */ 183/* Iterate thr' all leaf cfs_rq's on a runqueue */
147#define for_each_leaf_cfs_rq(rq, cfs_rq) \ 184#define for_each_leaf_cfs_rq(rq, cfs_rq) \
148 list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list) 185 list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
@@ -246,6 +283,14 @@ static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu)
246 return &cpu_rq(this_cpu)->cfs; 283 return &cpu_rq(this_cpu)->cfs;
247} 284}
248 285
286static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
287{
288}
289
290static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
291{
292}
293
249#define for_each_leaf_cfs_rq(rq, cfs_rq) \ 294#define for_each_leaf_cfs_rq(rq, cfs_rq) \
250 for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL) 295 for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL)
251 296
@@ -417,7 +462,6 @@ int sched_proc_update_handler(struct ctl_table *table, int write,
417 WRT_SYSCTL(sched_min_granularity); 462 WRT_SYSCTL(sched_min_granularity);
418 WRT_SYSCTL(sched_latency); 463 WRT_SYSCTL(sched_latency);
419 WRT_SYSCTL(sched_wakeup_granularity); 464 WRT_SYSCTL(sched_wakeup_granularity);
420 WRT_SYSCTL(sched_shares_ratelimit);
421#undef WRT_SYSCTL 465#undef WRT_SYSCTL
422 466
423 return 0; 467 return 0;
@@ -495,6 +539,9 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
495 return calc_delta_fair(sched_slice(cfs_rq, se), se); 539 return calc_delta_fair(sched_slice(cfs_rq, se), se);
496} 540}
497 541
542static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update);
543static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta);
544
498/* 545/*
499 * Update the current task's runtime statistics. Skip current tasks that 546 * Update the current task's runtime statistics. Skip current tasks that
500 * are not in our scheduling class. 547 * are not in our scheduling class.
@@ -514,6 +561,10 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
514 561
515 curr->vruntime += delta_exec_weighted; 562 curr->vruntime += delta_exec_weighted;
516 update_min_vruntime(cfs_rq); 563 update_min_vruntime(cfs_rq);
564
565#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
566 cfs_rq->load_unacc_exec_time += delta_exec;
567#endif
517} 568}
518 569
519static void update_curr(struct cfs_rq *cfs_rq) 570static void update_curr(struct cfs_rq *cfs_rq)
@@ -633,7 +684,6 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
633 list_add(&se->group_node, &cfs_rq->tasks); 684 list_add(&se->group_node, &cfs_rq->tasks);
634 } 685 }
635 cfs_rq->nr_running++; 686 cfs_rq->nr_running++;
636 se->on_rq = 1;
637} 687}
638 688
639static void 689static void
@@ -647,9 +697,140 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
647 list_del_init(&se->group_node); 697 list_del_init(&se->group_node);
648 } 698 }
649 cfs_rq->nr_running--; 699 cfs_rq->nr_running--;
650 se->on_rq = 0;
651} 700}
652 701
702#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
703static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq,
704 int global_update)
705{
706 struct task_group *tg = cfs_rq->tg;
707 long load_avg;
708
709 load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1);
710 load_avg -= cfs_rq->load_contribution;
711
712 if (global_update || abs(load_avg) > cfs_rq->load_contribution / 8) {
713 atomic_add(load_avg, &tg->load_weight);
714 cfs_rq->load_contribution += load_avg;
715 }
716}
717
718static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
719{
720 u64 period = sysctl_sched_shares_window;
721 u64 now, delta;
722 unsigned long load = cfs_rq->load.weight;
723
724 if (!cfs_rq)
725 return;
726
727 now = rq_of(cfs_rq)->clock;
728 delta = now - cfs_rq->load_stamp;
729
730 /* truncate load history at 4 idle periods */
731 if (cfs_rq->load_stamp > cfs_rq->load_last &&
732 now - cfs_rq->load_last > 4 * period) {
733 cfs_rq->load_period = 0;
734 cfs_rq->load_avg = 0;
735 }
736
737 cfs_rq->load_stamp = now;
738 cfs_rq->load_unacc_exec_time = 0;
739 cfs_rq->load_period += delta;
740 if (load) {
741 cfs_rq->load_last = now;
742 cfs_rq->load_avg += delta * load;
743 }
744
745 /* consider updating load contribution on each fold or truncate */
746 if (global_update || cfs_rq->load_period > period
747 || !cfs_rq->load_period)
748 update_cfs_rq_load_contribution(cfs_rq, global_update);
749
750 while (cfs_rq->load_period > period) {
751 /*
752 * Inline assembly required to prevent the compiler
753 * optimising this loop into a divmod call.
754 * See __iter_div_u64_rem() for another example of this.
755 */
756 asm("" : "+rm" (cfs_rq->load_period));
757 cfs_rq->load_period /= 2;
758 cfs_rq->load_avg /= 2;
759 }
760
761 if (!cfs_rq->curr && !cfs_rq->nr_running && !cfs_rq->load_avg)
762 list_del_leaf_cfs_rq(cfs_rq);
763}
764
765static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
766 unsigned long weight)
767{
768 if (se->on_rq) {
769 /* commit outstanding execution time */
770 if (cfs_rq->curr == se)
771 update_curr(cfs_rq);
772 account_entity_dequeue(cfs_rq, se);
773 }
774
775 update_load_set(&se->load, weight);
776
777 if (se->on_rq)
778 account_entity_enqueue(cfs_rq, se);
779}
780
781static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)
782{
783 struct task_group *tg;
784 struct sched_entity *se;
785 long load_weight, load, shares;
786
787 if (!cfs_rq)
788 return;
789
790 tg = cfs_rq->tg;
791 se = tg->se[cpu_of(rq_of(cfs_rq))];
792 if (!se)
793 return;
794
795 load = cfs_rq->load.weight + weight_delta;
796
797 load_weight = atomic_read(&tg->load_weight);
798 load_weight -= cfs_rq->load_contribution;
799 load_weight += load;
800
801 shares = (tg->shares * load);
802 if (load_weight)
803 shares /= load_weight;
804
805 if (shares < MIN_SHARES)
806 shares = MIN_SHARES;
807 if (shares > tg->shares)
808 shares = tg->shares;
809
810 reweight_entity(cfs_rq_of(se), se, shares);
811}
812
813static void update_entity_shares_tick(struct cfs_rq *cfs_rq)
814{
815 if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) {
816 update_cfs_load(cfs_rq, 0);
817 update_cfs_shares(cfs_rq, 0);
818 }
819}
820#else /* CONFIG_FAIR_GROUP_SCHED */
821static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
822{
823}
824
825static inline void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)
826{
827}
828
829static inline void update_entity_shares_tick(struct cfs_rq *cfs_rq)
830{
831}
832#endif /* CONFIG_FAIR_GROUP_SCHED */
833
653static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) 834static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
654{ 835{
655#ifdef CONFIG_SCHEDSTATS 836#ifdef CONFIG_SCHEDSTATS
@@ -771,6 +952,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
771 * Update run-time statistics of the 'current'. 952 * Update run-time statistics of the 'current'.
772 */ 953 */
773 update_curr(cfs_rq); 954 update_curr(cfs_rq);
955 update_cfs_load(cfs_rq, 0);
956 update_cfs_shares(cfs_rq, se->load.weight);
774 account_entity_enqueue(cfs_rq, se); 957 account_entity_enqueue(cfs_rq, se);
775 958
776 if (flags & ENQUEUE_WAKEUP) { 959 if (flags & ENQUEUE_WAKEUP) {
@@ -782,6 +965,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
782 check_spread(cfs_rq, se); 965 check_spread(cfs_rq, se);
783 if (se != cfs_rq->curr) 966 if (se != cfs_rq->curr)
784 __enqueue_entity(cfs_rq, se); 967 __enqueue_entity(cfs_rq, se);
968 se->on_rq = 1;
969
970 if (cfs_rq->nr_running == 1)
971 list_add_leaf_cfs_rq(cfs_rq);
785} 972}
786 973
787static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) 974static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -825,8 +1012,11 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
825 1012
826 if (se != cfs_rq->curr) 1013 if (se != cfs_rq->curr)
827 __dequeue_entity(cfs_rq, se); 1014 __dequeue_entity(cfs_rq, se);
1015 se->on_rq = 0;
1016 update_cfs_load(cfs_rq, 0);
828 account_entity_dequeue(cfs_rq, se); 1017 account_entity_dequeue(cfs_rq, se);
829 update_min_vruntime(cfs_rq); 1018 update_min_vruntime(cfs_rq);
1019 update_cfs_shares(cfs_rq, 0);
830 1020
831 /* 1021 /*
832 * Normalize the entity after updating the min_vruntime because the 1022 * Normalize the entity after updating the min_vruntime because the
@@ -955,6 +1145,11 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
955 */ 1145 */
956 update_curr(cfs_rq); 1146 update_curr(cfs_rq);
957 1147
1148 /*
1149 * Update share accounting for long-running entities.
1150 */
1151 update_entity_shares_tick(cfs_rq);
1152
958#ifdef CONFIG_SCHED_HRTICK 1153#ifdef CONFIG_SCHED_HRTICK
959 /* 1154 /*
960 * queued ticks are scheduled to match the slice, so don't bother 1155 * queued ticks are scheduled to match the slice, so don't bother
@@ -1055,6 +1250,13 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
1055 flags = ENQUEUE_WAKEUP; 1250 flags = ENQUEUE_WAKEUP;
1056 } 1251 }
1057 1252
1253 for_each_sched_entity(se) {
1254 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1255
1256 update_cfs_load(cfs_rq, 0);
1257 update_cfs_shares(cfs_rq, 0);
1258 }
1259
1058 hrtick_update(rq); 1260 hrtick_update(rq);
1059} 1261}
1060 1262
@@ -1071,12 +1273,20 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
1071 for_each_sched_entity(se) { 1273 for_each_sched_entity(se) {
1072 cfs_rq = cfs_rq_of(se); 1274 cfs_rq = cfs_rq_of(se);
1073 dequeue_entity(cfs_rq, se, flags); 1275 dequeue_entity(cfs_rq, se, flags);
1276
1074 /* Don't dequeue parent if it has other entities besides us */ 1277 /* Don't dequeue parent if it has other entities besides us */
1075 if (cfs_rq->load.weight) 1278 if (cfs_rq->load.weight)
1076 break; 1279 break;
1077 flags |= DEQUEUE_SLEEP; 1280 flags |= DEQUEUE_SLEEP;
1078 } 1281 }
1079 1282
1283 for_each_sched_entity(se) {
1284 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1285
1286 update_cfs_load(cfs_rq, 0);
1287 update_cfs_shares(cfs_rq, 0);
1288 }
1289
1080 hrtick_update(rq); 1290 hrtick_update(rq);
1081} 1291}
1082 1292
@@ -1143,51 +1353,20 @@ static void task_waking_fair(struct rq *rq, struct task_struct *p)
1143 * Adding load to a group doesn't make a group heavier, but can cause movement 1353 * Adding load to a group doesn't make a group heavier, but can cause movement
1144 * of group shares between cpus. Assuming the shares were perfectly aligned one 1354 * of group shares between cpus. Assuming the shares were perfectly aligned one
1145 * can calculate the shift in shares. 1355 * can calculate the shift in shares.
1146 *
1147 * The problem is that perfectly aligning the shares is rather expensive, hence
1148 * we try to avoid doing that too often - see update_shares(), which ratelimits
1149 * this change.
1150 *
1151 * We compensate this by not only taking the current delta into account, but
1152 * also considering the delta between when the shares were last adjusted and
1153 * now.
1154 *
1155 * We still saw a performance dip, some tracing learned us that between
1156 * cgroup:/ and cgroup:/foo balancing the number of affine wakeups increased
1157 * significantly. Therefore try to bias the error in direction of failing
1158 * the affine wakeup.
1159 *
1160 */ 1356 */
1161static long effective_load(struct task_group *tg, int cpu, 1357static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
1162 long wl, long wg)
1163{ 1358{
1164 struct sched_entity *se = tg->se[cpu]; 1359 struct sched_entity *se = tg->se[cpu];
1165 1360
1166 if (!tg->parent) 1361 if (!tg->parent)
1167 return wl; 1362 return wl;
1168 1363
1169 /*
1170 * By not taking the decrease of shares on the other cpu into
1171 * account our error leans towards reducing the affine wakeups.
1172 */
1173 if (!wl && sched_feat(ASYM_EFF_LOAD))
1174 return wl;
1175
1176 for_each_sched_entity(se) { 1364 for_each_sched_entity(se) {
1177 long S, rw, s, a, b; 1365 long S, rw, s, a, b;
1178 long more_w;
1179
1180 /*
1181 * Instead of using this increment, also add the difference
1182 * between when the shares were last updated and now.
1183 */
1184 more_w = se->my_q->load.weight - se->my_q->rq_weight;
1185 wl += more_w;
1186 wg += more_w;
1187 1366
1188 S = se->my_q->tg->shares; 1367 S = se->my_q->tg->shares;
1189 s = se->my_q->shares; 1368 s = se->load.weight;
1190 rw = se->my_q->rq_weight; 1369 rw = se->my_q->load.weight;
1191 1370
1192 a = S*(rw + wl); 1371 a = S*(rw + wl);
1193 b = S*rw + s*wg; 1372 b = S*rw + s*wg;
@@ -1508,23 +1687,6 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
1508 sd = tmp; 1687 sd = tmp;
1509 } 1688 }
1510 1689
1511#ifdef CONFIG_FAIR_GROUP_SCHED
1512 if (sched_feat(LB_SHARES_UPDATE)) {
1513 /*
1514 * Pick the largest domain to update shares over
1515 */
1516 tmp = sd;
1517 if (affine_sd && (!tmp || affine_sd->span_weight > sd->span_weight))
1518 tmp = affine_sd;
1519
1520 if (tmp) {
1521 raw_spin_unlock(&rq->lock);
1522 update_shares(tmp);
1523 raw_spin_lock(&rq->lock);
1524 }
1525 }
1526#endif
1527
1528 if (affine_sd) { 1690 if (affine_sd) {
1529 if (cpu == prev_cpu || wake_affine(affine_sd, p, sync)) 1691 if (cpu == prev_cpu || wake_affine(affine_sd, p, sync))
1530 return select_idle_sibling(p, cpu); 1692 return select_idle_sibling(p, cpu);
@@ -1909,6 +2071,48 @@ out:
1909} 2071}
1910 2072
1911#ifdef CONFIG_FAIR_GROUP_SCHED 2073#ifdef CONFIG_FAIR_GROUP_SCHED
2074/*
2075 * update tg->load_weight by folding this cpu's load_avg
2076 */
2077static int update_shares_cpu(struct task_group *tg, int cpu)
2078{
2079 struct cfs_rq *cfs_rq;
2080 unsigned long flags;
2081 struct rq *rq;
2082
2083 if (!tg->se[cpu])
2084 return 0;
2085
2086 rq = cpu_rq(cpu);
2087 cfs_rq = tg->cfs_rq[cpu];
2088
2089 raw_spin_lock_irqsave(&rq->lock, flags);
2090
2091 update_rq_clock(rq);
2092 update_cfs_load(cfs_rq, 1);
2093
2094 /*
2095 * We need to update shares after updating tg->load_weight in
2096 * order to adjust the weight of groups with long running tasks.
2097 */
2098 update_cfs_shares(cfs_rq, 0);
2099
2100 raw_spin_unlock_irqrestore(&rq->lock, flags);
2101
2102 return 0;
2103}
2104
2105static void update_shares(int cpu)
2106{
2107 struct cfs_rq *cfs_rq;
2108 struct rq *rq = cpu_rq(cpu);
2109
2110 rcu_read_lock();
2111 for_each_leaf_cfs_rq(rq, cfs_rq)
2112 update_shares_cpu(cfs_rq->tg, cpu);
2113 rcu_read_unlock();
2114}
2115
1912static unsigned long 2116static unsigned long
1913load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, 2117load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
1914 unsigned long max_load_move, 2118 unsigned long max_load_move,
@@ -1956,6 +2160,10 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
1956 return max_load_move - rem_load_move; 2160 return max_load_move - rem_load_move;
1957} 2161}
1958#else 2162#else
2163static inline void update_shares(int cpu)
2164{
2165}
2166
1959static unsigned long 2167static unsigned long
1960load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, 2168load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
1961 unsigned long max_load_move, 2169 unsigned long max_load_move,
@@ -3032,7 +3240,6 @@ static int load_balance(int this_cpu, struct rq *this_rq,
3032 schedstat_inc(sd, lb_count[idle]); 3240 schedstat_inc(sd, lb_count[idle]);
3033 3241
3034redo: 3242redo:
3035 update_shares(sd);
3036 group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, 3243 group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,
3037 cpus, balance); 3244 cpus, balance);
3038 3245
@@ -3174,8 +3381,6 @@ out_one_pinned:
3174 else 3381 else
3175 ld_moved = 0; 3382 ld_moved = 0;
3176out: 3383out:
3177 if (ld_moved)
3178 update_shares(sd);
3179 return ld_moved; 3384 return ld_moved;
3180} 3385}
3181 3386
@@ -3199,6 +3404,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
3199 */ 3404 */
3200 raw_spin_unlock(&this_rq->lock); 3405 raw_spin_unlock(&this_rq->lock);
3201 3406
3407 update_shares(this_cpu);
3202 for_each_domain(this_cpu, sd) { 3408 for_each_domain(this_cpu, sd) {
3203 unsigned long interval; 3409 unsigned long interval;
3204 int balance = 1; 3410 int balance = 1;
@@ -3569,6 +3775,8 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
3569 int update_next_balance = 0; 3775 int update_next_balance = 0;
3570 int need_serialize; 3776 int need_serialize;
3571 3777
3778 update_shares(cpu);
3779
3572 for_each_domain(cpu, sd) { 3780 for_each_domain(cpu, sd) {
3573 if (!(sd->flags & SD_LOAD_BALANCE)) 3781 if (!(sd->flags & SD_LOAD_BALANCE))
3574 continue; 3782 continue;
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 185f920ec1a2..68e69acc29b9 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -52,8 +52,6 @@ SCHED_FEAT(ARCH_POWER, 0)
52SCHED_FEAT(HRTICK, 0) 52SCHED_FEAT(HRTICK, 0)
53SCHED_FEAT(DOUBLE_TICK, 0) 53SCHED_FEAT(DOUBLE_TICK, 0)
54SCHED_FEAT(LB_BIAS, 1) 54SCHED_FEAT(LB_BIAS, 1)
55SCHED_FEAT(LB_SHARES_UPDATE, 1)
56SCHED_FEAT(ASYM_EFF_LOAD, 1)
57 55
58/* 56/*
59 * Spin-wait on mutex acquisition when the mutex owner is running on 57 * Spin-wait on mutex acquisition when the mutex owner is running on
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index bea7d79f7e9c..c914ec747ca6 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -183,6 +183,17 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
183 return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period); 183 return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
184} 184}
185 185
186static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
187{
188 list_add_rcu(&rt_rq->leaf_rt_rq_list,
189 &rq_of_rt_rq(rt_rq)->leaf_rt_rq_list);
190}
191
192static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq)
193{
194 list_del_rcu(&rt_rq->leaf_rt_rq_list);
195}
196
186#define for_each_leaf_rt_rq(rt_rq, rq) \ 197#define for_each_leaf_rt_rq(rt_rq, rq) \
187 list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list) 198 list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list)
188 199
@@ -276,6 +287,14 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
276 return ktime_to_ns(def_rt_bandwidth.rt_period); 287 return ktime_to_ns(def_rt_bandwidth.rt_period);
277} 288}
278 289
290static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
291{
292}
293
294static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq)
295{
296}
297
279#define for_each_leaf_rt_rq(rt_rq, rq) \ 298#define for_each_leaf_rt_rq(rt_rq, rq) \
280 for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL) 299 for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
281 300
@@ -825,6 +844,9 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
825 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) 844 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
826 return; 845 return;
827 846
847 if (!rt_rq->rt_nr_running)
848 list_add_leaf_rt_rq(rt_rq);
849
828 if (head) 850 if (head)
829 list_add(&rt_se->run_list, queue); 851 list_add(&rt_se->run_list, queue);
830 else 852 else
@@ -844,6 +866,8 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se)
844 __clear_bit(rt_se_prio(rt_se), array->bitmap); 866 __clear_bit(rt_se_prio(rt_se), array->bitmap);
845 867
846 dec_rt_tasks(rt_se, rt_rq); 868 dec_rt_tasks(rt_se, rt_rq);
869 if (!rt_rq->rt_nr_running)
870 list_del_leaf_rt_rq(rt_rq);
847} 871}
848 872
849/* 873/*
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 18f4be0d5fe0..d4d918a91881 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -853,7 +853,9 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
853 cpumask_any(cpu_online_mask)); 853 cpumask_any(cpu_online_mask));
854 case CPU_DEAD: 854 case CPU_DEAD:
855 case CPU_DEAD_FROZEN: { 855 case CPU_DEAD_FROZEN: {
856 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 856 static struct sched_param param = {
857 .sched_priority = MAX_RT_PRIO-1
858 };
857 859
858 p = per_cpu(ksoftirqd, hotcpu); 860 p = per_cpu(ksoftirqd, hotcpu);
859 per_cpu(ksoftirqd, hotcpu) = NULL; 861 per_cpu(ksoftirqd, hotcpu) = NULL;
diff --git a/kernel/srcu.c b/kernel/srcu.c
index c71e07500536..98d8c1e80edb 100644
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -31,6 +31,7 @@
31#include <linux/rcupdate.h> 31#include <linux/rcupdate.h>
32#include <linux/sched.h> 32#include <linux/sched.h>
33#include <linux/smp.h> 33#include <linux/smp.h>
34#include <linux/delay.h>
34#include <linux/srcu.h> 35#include <linux/srcu.h>
35 36
36static int init_srcu_struct_fields(struct srcu_struct *sp) 37static int init_srcu_struct_fields(struct srcu_struct *sp)
@@ -203,9 +204,14 @@ static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
203 * all srcu_read_lock() calls using the old counters have completed. 204 * all srcu_read_lock() calls using the old counters have completed.
204 * Their corresponding critical sections might well be still 205 * Their corresponding critical sections might well be still
205 * executing, but the srcu_read_lock() primitives themselves 206 * executing, but the srcu_read_lock() primitives themselves
206 * will have finished executing. 207 * will have finished executing. We initially give readers
208 * an arbitrarily chosen 10 microseconds to get out of their
209 * SRCU read-side critical sections, then loop waiting 1/HZ
210 * seconds per iteration.
207 */ 211 */
208 212
213 if (srcu_readers_active_idx(sp, idx))
214 udelay(CONFIG_SRCU_SYNCHRONIZE_DELAY);
209 while (srcu_readers_active_idx(sp, idx)) 215 while (srcu_readers_active_idx(sp, idx))
210 schedule_timeout_interruptible(1); 216 schedule_timeout_interruptible(1);
211 217
diff --git a/kernel/sys.c b/kernel/sys.c
index 7f5a0cd296a9..2745dcdb6c6c 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1080,8 +1080,10 @@ SYSCALL_DEFINE0(setsid)
1080 err = session; 1080 err = session;
1081out: 1081out:
1082 write_unlock_irq(&tasklist_lock); 1082 write_unlock_irq(&tasklist_lock);
1083 if (err > 0) 1083 if (err > 0) {
1084 proc_sid_connector(group_leader); 1084 proc_sid_connector(group_leader);
1085 sched_autogroup_create_attach(group_leader);
1086 }
1085 return err; 1087 return err;
1086} 1088}
1087 1089
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5abfa1518554..ae5cbb1e3ced 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -259,8 +259,6 @@ static int min_wakeup_granularity_ns; /* 0 usecs */
259static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ 259static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */
260static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; 260static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
261static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1; 261static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
262static int min_sched_shares_ratelimit = 100000; /* 100 usec */
263static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */
264#endif 262#endif
265 263
266#ifdef CONFIG_COMPACTION 264#ifdef CONFIG_COMPACTION
@@ -305,15 +303,6 @@ static struct ctl_table kern_table[] = {
305 .extra2 = &max_wakeup_granularity_ns, 303 .extra2 = &max_wakeup_granularity_ns,
306 }, 304 },
307 { 305 {
308 .procname = "sched_shares_ratelimit",
309 .data = &sysctl_sched_shares_ratelimit,
310 .maxlen = sizeof(unsigned int),
311 .mode = 0644,
312 .proc_handler = sched_proc_update_handler,
313 .extra1 = &min_sched_shares_ratelimit,
314 .extra2 = &max_sched_shares_ratelimit,
315 },
316 {
317 .procname = "sched_tunable_scaling", 306 .procname = "sched_tunable_scaling",
318 .data = &sysctl_sched_tunable_scaling, 307 .data = &sysctl_sched_tunable_scaling,
319 .maxlen = sizeof(enum sched_tunable_scaling), 308 .maxlen = sizeof(enum sched_tunable_scaling),
@@ -323,14 +312,6 @@ static struct ctl_table kern_table[] = {
323 .extra2 = &max_sched_tunable_scaling, 312 .extra2 = &max_sched_tunable_scaling,
324 }, 313 },
325 { 314 {
326 .procname = "sched_shares_thresh",
327 .data = &sysctl_sched_shares_thresh,
328 .maxlen = sizeof(unsigned int),
329 .mode = 0644,
330 .proc_handler = proc_dointvec_minmax,
331 .extra1 = &zero,
332 },
333 {
334 .procname = "sched_migration_cost", 315 .procname = "sched_migration_cost",
335 .data = &sysctl_sched_migration_cost, 316 .data = &sysctl_sched_migration_cost,
336 .maxlen = sizeof(unsigned int), 317 .maxlen = sizeof(unsigned int),
@@ -352,6 +333,13 @@ static struct ctl_table kern_table[] = {
352 .proc_handler = proc_dointvec, 333 .proc_handler = proc_dointvec,
353 }, 334 },
354 { 335 {
336 .procname = "sched_shares_window",
337 .data = &sysctl_sched_shares_window,
338 .maxlen = sizeof(unsigned int),
339 .mode = 0644,
340 .proc_handler = proc_dointvec,
341 },
342 {
355 .procname = "timer_migration", 343 .procname = "timer_migration",
356 .data = &sysctl_timer_migration, 344 .data = &sysctl_timer_migration,
357 .maxlen = sizeof(unsigned int), 345 .maxlen = sizeof(unsigned int),
@@ -382,6 +370,17 @@ static struct ctl_table kern_table[] = {
382 .mode = 0644, 370 .mode = 0644,
383 .proc_handler = proc_dointvec, 371 .proc_handler = proc_dointvec,
384 }, 372 },
373#ifdef CONFIG_SCHED_AUTOGROUP
374 {
375 .procname = "sched_autogroup_enabled",
376 .data = &sysctl_sched_autogroup_enabled,
377 .maxlen = sizeof(unsigned int),
378 .mode = 0644,
379 .proc_handler = proc_dointvec,
380 .extra1 = &zero,
381 .extra2 = &one,
382 },
383#endif
385#ifdef CONFIG_PROVE_LOCKING 384#ifdef CONFIG_PROVE_LOCKING
386 { 385 {
387 .procname = "prove_locking", 386 .procname = "prove_locking",
@@ -745,21 +744,21 @@ static struct ctl_table kern_table[] = {
745 .extra1 = &zero, 744 .extra1 = &zero,
746 .extra2 = &one, 745 .extra2 = &one,
747 }, 746 },
748#endif
749#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR)
750 { 747 {
751 .procname = "unknown_nmi_panic", 748 .procname = "nmi_watchdog",
752 .data = &unknown_nmi_panic, 749 .data = &watchdog_enabled,
753 .maxlen = sizeof (int), 750 .maxlen = sizeof (int),
754 .mode = 0644, 751 .mode = 0644,
755 .proc_handler = proc_dointvec, 752 .proc_handler = proc_dowatchdog_enabled,
756 }, 753 },
754#endif
755#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
757 { 756 {
758 .procname = "nmi_watchdog", 757 .procname = "unknown_nmi_panic",
759 .data = &nmi_watchdog_enabled, 758 .data = &unknown_nmi_panic,
760 .maxlen = sizeof (int), 759 .maxlen = sizeof (int),
761 .mode = 0644, 760 .mode = 0644,
762 .proc_handler = proc_nmi_enabled, 761 .proc_handler = proc_dointvec,
763 }, 762 },
764#endif 763#endif
765#if defined(CONFIG_X86) 764#if defined(CONFIG_X86)
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 1357c5786064..4b2545a136ff 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -136,7 +136,6 @@ static const struct bin_table bin_kern_table[] = {
136 { CTL_INT, KERN_IA64_UNALIGNED, "ignore-unaligned-usertrap" }, 136 { CTL_INT, KERN_IA64_UNALIGNED, "ignore-unaligned-usertrap" },
137 { CTL_INT, KERN_COMPAT_LOG, "compat-log" }, 137 { CTL_INT, KERN_COMPAT_LOG, "compat-log" },
138 { CTL_INT, KERN_MAX_LOCK_DEPTH, "max_lock_depth" }, 138 { CTL_INT, KERN_MAX_LOCK_DEPTH, "max_lock_depth" },
139 { CTL_INT, KERN_NMI_WATCHDOG, "nmi_watchdog" },
140 { CTL_INT, KERN_PANIC_ON_NMI, "panic_on_unrecovered_nmi" }, 139 { CTL_INT, KERN_PANIC_ON_NMI, "panic_on_unrecovered_nmi" },
141 {} 140 {}
142}; 141};
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index c8231fb15708..3308fd7f1b52 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -349,25 +349,47 @@ static int parse(struct nlattr *na, struct cpumask *mask)
349 return ret; 349 return ret;
350} 350}
351 351
352#ifdef CONFIG_IA64
353#define TASKSTATS_NEEDS_PADDING 1
354#endif
355
352static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) 356static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid)
353{ 357{
354 struct nlattr *na, *ret; 358 struct nlattr *na, *ret;
355 int aggr; 359 int aggr;
356 360
357 /* If we don't pad, we end up with alignment on a 4 byte boundary.
358 * This causes lots of runtime warnings on systems requiring 8 byte
359 * alignment */
360 u32 pids[2] = { pid, 0 };
361 int pid_size = ALIGN(sizeof(pid), sizeof(long));
362
363 aggr = (type == TASKSTATS_TYPE_PID) 361 aggr = (type == TASKSTATS_TYPE_PID)
364 ? TASKSTATS_TYPE_AGGR_PID 362 ? TASKSTATS_TYPE_AGGR_PID
365 : TASKSTATS_TYPE_AGGR_TGID; 363 : TASKSTATS_TYPE_AGGR_TGID;
366 364
365 /*
366 * The taskstats structure is internally aligned on 8 byte
367 * boundaries but the layout of the aggregrate reply, with
368 * two NLA headers and the pid (each 4 bytes), actually
369 * force the entire structure to be unaligned. This causes
370 * the kernel to issue unaligned access warnings on some
371 * architectures like ia64. Unfortunately, some software out there
372 * doesn't properly unroll the NLA packet and assumes that the start
373 * of the taskstats structure will always be 20 bytes from the start
374 * of the netlink payload. Aligning the start of the taskstats
375 * structure breaks this software, which we don't want. So, for now
376 * the alignment only happens on architectures that require it
377 * and those users will have to update to fixed versions of those
378 * packages. Space is reserved in the packet only when needed.
379 * This ifdef should be removed in several years e.g. 2012 once
380 * we can be confident that fixed versions are installed on most
381 * systems. We add the padding before the aggregate since the
382 * aggregate is already a defined type.
383 */
384#ifdef TASKSTATS_NEEDS_PADDING
385 if (nla_put(skb, TASKSTATS_TYPE_NULL, 0, NULL) < 0)
386 goto err;
387#endif
367 na = nla_nest_start(skb, aggr); 388 na = nla_nest_start(skb, aggr);
368 if (!na) 389 if (!na)
369 goto err; 390 goto err;
370 if (nla_put(skb, type, pid_size, pids) < 0) 391
392 if (nla_put(skb, type, sizeof(pid), &pid) < 0)
371 goto err; 393 goto err;
372 ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); 394 ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats));
373 if (!ret) 395 if (!ret)
@@ -456,6 +478,18 @@ out:
456 return rc; 478 return rc;
457} 479}
458 480
481static size_t taskstats_packet_size(void)
482{
483 size_t size;
484
485 size = nla_total_size(sizeof(u32)) +
486 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
487#ifdef TASKSTATS_NEEDS_PADDING
488 size += nla_total_size(0); /* Padding for alignment */
489#endif
490 return size;
491}
492
459static int cmd_attr_pid(struct genl_info *info) 493static int cmd_attr_pid(struct genl_info *info)
460{ 494{
461 struct taskstats *stats; 495 struct taskstats *stats;
@@ -464,8 +498,7 @@ static int cmd_attr_pid(struct genl_info *info)
464 u32 pid; 498 u32 pid;
465 int rc; 499 int rc;
466 500
467 size = nla_total_size(sizeof(u32)) + 501 size = taskstats_packet_size();
468 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
469 502
470 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); 503 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size);
471 if (rc < 0) 504 if (rc < 0)
@@ -494,8 +527,7 @@ static int cmd_attr_tgid(struct genl_info *info)
494 u32 tgid; 527 u32 tgid;
495 int rc; 528 int rc;
496 529
497 size = nla_total_size(sizeof(u32)) + 530 size = taskstats_packet_size();
498 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
499 531
500 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); 532 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size);
501 if (rc < 0) 533 if (rc < 0)
@@ -570,8 +602,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)
570 /* 602 /*
571 * Size includes space for nested attributes 603 * Size includes space for nested attributes
572 */ 604 */
573 size = nla_total_size(sizeof(u32)) + 605 size = taskstats_packet_size();
574 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
575 606
576 is_thread_group = !!taskstats_tgid_alloc(tsk); 607 is_thread_group = !!taskstats_tgid_alloc(tsk);
577 if (is_thread_group) { 608 if (is_thread_group) {
diff --git a/kernel/time/timecompare.c b/kernel/time/timecompare.c
index ac38fbb176cc..a9ae369925ce 100644
--- a/kernel/time/timecompare.c
+++ b/kernel/time/timecompare.c
@@ -21,6 +21,7 @@
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/math64.h> 23#include <linux/math64.h>
24#include <linux/kernel.h>
24 25
25/* 26/*
26 * fixed point arithmetic scale factor for skew 27 * fixed point arithmetic scale factor for skew
@@ -57,11 +58,11 @@ int timecompare_offset(struct timecompare *sync,
57 int index; 58 int index;
58 int num_samples = sync->num_samples; 59 int num_samples = sync->num_samples;
59 60
60 if (num_samples > sizeof(buffer)/sizeof(buffer[0])) { 61 if (num_samples > ARRAY_SIZE(buffer)) {
61 samples = kmalloc(sizeof(*samples) * num_samples, GFP_ATOMIC); 62 samples = kmalloc(sizeof(*samples) * num_samples, GFP_ATOMIC);
62 if (!samples) { 63 if (!samples) {
63 samples = buffer; 64 samples = buffer;
64 num_samples = sizeof(buffer)/sizeof(buffer[0]); 65 num_samples = ARRAY_SIZE(buffer);
65 } 66 }
66 } else { 67 } else {
67 samples = buffer; 68 samples = buffer;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 49010d822f72..5bb86da82003 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -32,6 +32,8 @@ struct timekeeper {
32 cycle_t cycle_interval; 32 cycle_t cycle_interval;
33 /* Number of clock shifted nano seconds in one NTP interval. */ 33 /* Number of clock shifted nano seconds in one NTP interval. */
34 u64 xtime_interval; 34 u64 xtime_interval;
35 /* shifted nano seconds left over when rounding cycle_interval */
36 s64 xtime_remainder;
35 /* Raw nano seconds accumulated per NTP interval. */ 37 /* Raw nano seconds accumulated per NTP interval. */
36 u32 raw_interval; 38 u32 raw_interval;
37 39
@@ -62,7 +64,7 @@ struct timekeeper timekeeper;
62static void timekeeper_setup_internals(struct clocksource *clock) 64static void timekeeper_setup_internals(struct clocksource *clock)
63{ 65{
64 cycle_t interval; 66 cycle_t interval;
65 u64 tmp; 67 u64 tmp, ntpinterval;
66 68
67 timekeeper.clock = clock; 69 timekeeper.clock = clock;
68 clock->cycle_last = clock->read(clock); 70 clock->cycle_last = clock->read(clock);
@@ -70,6 +72,7 @@ static void timekeeper_setup_internals(struct clocksource *clock)
70 /* Do the ns -> cycle conversion first, using original mult */ 72 /* Do the ns -> cycle conversion first, using original mult */
71 tmp = NTP_INTERVAL_LENGTH; 73 tmp = NTP_INTERVAL_LENGTH;
72 tmp <<= clock->shift; 74 tmp <<= clock->shift;
75 ntpinterval = tmp;
73 tmp += clock->mult/2; 76 tmp += clock->mult/2;
74 do_div(tmp, clock->mult); 77 do_div(tmp, clock->mult);
75 if (tmp == 0) 78 if (tmp == 0)
@@ -80,6 +83,7 @@ static void timekeeper_setup_internals(struct clocksource *clock)
80 83
81 /* Go back from cycles -> shifted ns */ 84 /* Go back from cycles -> shifted ns */
82 timekeeper.xtime_interval = (u64) interval * clock->mult; 85 timekeeper.xtime_interval = (u64) interval * clock->mult;
86 timekeeper.xtime_remainder = ntpinterval - timekeeper.xtime_interval;
83 timekeeper.raw_interval = 87 timekeeper.raw_interval =
84 ((u64) interval * clock->mult) >> clock->shift; 88 ((u64) interval * clock->mult) >> clock->shift;
85 89
@@ -719,7 +723,8 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
719 723
720 /* Accumulate error between NTP and clock interval */ 724 /* Accumulate error between NTP and clock interval */
721 timekeeper.ntp_error += tick_length << shift; 725 timekeeper.ntp_error += tick_length << shift;
722 timekeeper.ntp_error -= timekeeper.xtime_interval << 726 timekeeper.ntp_error -=
727 (timekeeper.xtime_interval + timekeeper.xtime_remainder) <<
723 (timekeeper.ntp_error_shift + shift); 728 (timekeeper.ntp_error_shift + shift);
724 729
725 return offset; 730 return offset;
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index ab8f5e33fa92..32a19f9397fc 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -79,26 +79,26 @@ print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base,
79{ 79{
80 struct hrtimer *timer, tmp; 80 struct hrtimer *timer, tmp;
81 unsigned long next = 0, i; 81 unsigned long next = 0, i;
82 struct rb_node *curr; 82 struct timerqueue_node *curr;
83 unsigned long flags; 83 unsigned long flags;
84 84
85next_one: 85next_one:
86 i = 0; 86 i = 0;
87 raw_spin_lock_irqsave(&base->cpu_base->lock, flags); 87 raw_spin_lock_irqsave(&base->cpu_base->lock, flags);
88 88
89 curr = base->first; 89 curr = timerqueue_getnext(&base->active);
90 /* 90 /*
91 * Crude but we have to do this O(N*N) thing, because 91 * Crude but we have to do this O(N*N) thing, because
92 * we have to unlock the base when printing: 92 * we have to unlock the base when printing:
93 */ 93 */
94 while (curr && i < next) { 94 while (curr && i < next) {
95 curr = rb_next(curr); 95 curr = timerqueue_iterate_next(curr);
96 i++; 96 i++;
97 } 97 }
98 98
99 if (curr) { 99 if (curr) {
100 100
101 timer = rb_entry(curr, struct hrtimer, node); 101 timer = container_of(curr, struct hrtimer, node);
102 tmp = *timer; 102 tmp = *timer;
103 raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags); 103 raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags);
104 104
diff --git a/kernel/timer.c b/kernel/timer.c
index 353b9227c2ec..43ca9936f2d0 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -88,18 +88,6 @@ struct tvec_base boot_tvec_bases;
88EXPORT_SYMBOL(boot_tvec_bases); 88EXPORT_SYMBOL(boot_tvec_bases);
89static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; 89static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
90 90
91/*
92 * Note that all tvec_bases are 2 byte aligned and lower bit of
93 * base in timer_list is guaranteed to be zero. Use the LSB to
94 * indicate whether the timer is deferrable.
95 *
96 * A deferrable timer will work normally when the system is busy, but
97 * will not cause a CPU to come out of idle just to service it; instead,
98 * the timer will be serviced when the CPU eventually wakes up with a
99 * subsequent non-deferrable timer.
100 */
101#define TBASE_DEFERRABLE_FLAG (0x1)
102
103/* Functions below help us manage 'deferrable' flag */ 91/* Functions below help us manage 'deferrable' flag */
104static inline unsigned int tbase_get_deferrable(struct tvec_base *base) 92static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
105{ 93{
@@ -113,8 +101,7 @@ static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
113 101
114static inline void timer_set_deferrable(struct timer_list *timer) 102static inline void timer_set_deferrable(struct timer_list *timer)
115{ 103{
116 timer->base = ((struct tvec_base *)((unsigned long)(timer->base) | 104 timer->base = TBASE_MAKE_DEFERRED(timer->base);
117 TBASE_DEFERRABLE_FLAG));
118} 105}
119 106
120static inline void 107static inline void
@@ -343,15 +330,6 @@ void set_timer_slack(struct timer_list *timer, int slack_hz)
343} 330}
344EXPORT_SYMBOL_GPL(set_timer_slack); 331EXPORT_SYMBOL_GPL(set_timer_slack);
345 332
346
347static inline void set_running_timer(struct tvec_base *base,
348 struct timer_list *timer)
349{
350#ifdef CONFIG_SMP
351 base->running_timer = timer;
352#endif
353}
354
355static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) 333static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
356{ 334{
357 unsigned long expires = timer->expires; 335 unsigned long expires = timer->expires;
@@ -936,15 +914,12 @@ int del_timer(struct timer_list *timer)
936} 914}
937EXPORT_SYMBOL(del_timer); 915EXPORT_SYMBOL(del_timer);
938 916
939#ifdef CONFIG_SMP
940/** 917/**
941 * try_to_del_timer_sync - Try to deactivate a timer 918 * try_to_del_timer_sync - Try to deactivate a timer
942 * @timer: timer do del 919 * @timer: timer do del
943 * 920 *
944 * This function tries to deactivate a timer. Upon successful (ret >= 0) 921 * This function tries to deactivate a timer. Upon successful (ret >= 0)
945 * exit the timer is not queued and the handler is not running on any CPU. 922 * exit the timer is not queued and the handler is not running on any CPU.
946 *
947 * It must not be called from interrupt contexts.
948 */ 923 */
949int try_to_del_timer_sync(struct timer_list *timer) 924int try_to_del_timer_sync(struct timer_list *timer)
950{ 925{
@@ -973,6 +948,7 @@ out:
973} 948}
974EXPORT_SYMBOL(try_to_del_timer_sync); 949EXPORT_SYMBOL(try_to_del_timer_sync);
975 950
951#ifdef CONFIG_SMP
976/** 952/**
977 * del_timer_sync - deactivate a timer and wait for the handler to finish. 953 * del_timer_sync - deactivate a timer and wait for the handler to finish.
978 * @timer: the timer to be deactivated 954 * @timer: the timer to be deactivated
@@ -983,7 +959,7 @@ EXPORT_SYMBOL(try_to_del_timer_sync);
983 * 959 *
984 * Synchronization rules: Callers must prevent restarting of the timer, 960 * Synchronization rules: Callers must prevent restarting of the timer,
985 * otherwise this function is meaningless. It must not be called from 961 * otherwise this function is meaningless. It must not be called from
986 * interrupt contexts. The caller must not hold locks which would prevent 962 * hardirq contexts. The caller must not hold locks which would prevent
987 * completion of the timer's handler. The timer's handler must not call 963 * completion of the timer's handler. The timer's handler must not call
988 * add_timer_on(). Upon exit the timer is not queued and the handler is 964 * add_timer_on(). Upon exit the timer is not queued and the handler is
989 * not running on any CPU. 965 * not running on any CPU.
@@ -993,14 +969,16 @@ EXPORT_SYMBOL(try_to_del_timer_sync);
993int del_timer_sync(struct timer_list *timer) 969int del_timer_sync(struct timer_list *timer)
994{ 970{
995#ifdef CONFIG_LOCKDEP 971#ifdef CONFIG_LOCKDEP
996 unsigned long flags; 972 local_bh_disable();
997
998 local_irq_save(flags);
999 lock_map_acquire(&timer->lockdep_map); 973 lock_map_acquire(&timer->lockdep_map);
1000 lock_map_release(&timer->lockdep_map); 974 lock_map_release(&timer->lockdep_map);
1001 local_irq_restore(flags); 975 local_bh_enable();
1002#endif 976#endif
1003 977 /*
978 * don't use it in hardirq context, because it
979 * could lead to deadlock.
980 */
981 WARN_ON(in_irq());
1004 for (;;) { 982 for (;;) {
1005 int ret = try_to_del_timer_sync(timer); 983 int ret = try_to_del_timer_sync(timer);
1006 if (ret >= 0) 984 if (ret >= 0)
@@ -1111,7 +1089,7 @@ static inline void __run_timers(struct tvec_base *base)
1111 1089
1112 timer_stats_account_timer(timer); 1090 timer_stats_account_timer(timer);
1113 1091
1114 set_running_timer(base, timer); 1092 base->running_timer = timer;
1115 detach_timer(timer, 1); 1093 detach_timer(timer, 1);
1116 1094
1117 spin_unlock_irq(&base->lock); 1095 spin_unlock_irq(&base->lock);
@@ -1119,7 +1097,7 @@ static inline void __run_timers(struct tvec_base *base)
1119 spin_lock_irq(&base->lock); 1097 spin_lock_irq(&base->lock);
1120 } 1098 }
1121 } 1099 }
1122 set_running_timer(base, NULL); 1100 base->running_timer = NULL;
1123 spin_unlock_irq(&base->lock); 1101 spin_unlock_irq(&base->lock);
1124} 1102}
1125 1103
@@ -1249,7 +1227,7 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now,
1249 */ 1227 */
1250unsigned long get_next_timer_interrupt(unsigned long now) 1228unsigned long get_next_timer_interrupt(unsigned long now)
1251{ 1229{
1252 struct tvec_base *base = __get_cpu_var(tvec_bases); 1230 struct tvec_base *base = __this_cpu_read(tvec_bases);
1253 unsigned long expires; 1231 unsigned long expires;
1254 1232
1255 /* 1233 /*
@@ -1298,7 +1276,7 @@ void update_process_times(int user_tick)
1298 */ 1276 */
1299static void run_timer_softirq(struct softirq_action *h) 1277static void run_timer_softirq(struct softirq_action *h)
1300{ 1278{
1301 struct tvec_base *base = __get_cpu_var(tvec_bases); 1279 struct tvec_base *base = __this_cpu_read(tvec_bases);
1302 1280
1303 hrtimer_run_pending(); 1281 hrtimer_run_pending();
1304 1282
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index ea37e2ff4164..14674dce77a6 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -69,6 +69,21 @@ config EVENT_TRACING
69 select CONTEXT_SWITCH_TRACER 69 select CONTEXT_SWITCH_TRACER
70 bool 70 bool
71 71
72config EVENT_POWER_TRACING_DEPRECATED
73 depends on EVENT_TRACING
74 bool "Deprecated power event trace API, to be removed"
75 default y
76 help
77 Provides old power event types:
78 C-state/idle accounting events:
79 power:power_start
80 power:power_end
81 and old cpufreq accounting event:
82 power:power_frequency
83 This is for userspace compatibility
84 and will vanish after 5 kernel iterations,
85 namely 2.6.41.
86
72config CONTEXT_SWITCH_TRACER 87config CONTEXT_SWITCH_TRACER
73 bool 88 bool
74 89
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index a22582a06161..f55fcf61b223 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -13,5 +13,8 @@
13#define CREATE_TRACE_POINTS 13#define CREATE_TRACE_POINTS
14#include <trace/events/power.h> 14#include <trace/events/power.h>
15 15
16EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency); 16#ifdef EVENT_POWER_TRACING_DEPRECATED
17EXPORT_TRACEPOINT_SYMBOL_GPL(power_start);
18#endif
19EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle);
17 20
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 9ed509a015d8..bd1c35a4fbcc 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3853,6 +3853,13 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3853 3853
3854 /* Need to copy one event at a time */ 3854 /* Need to copy one event at a time */
3855 do { 3855 do {
3856 /* We need the size of one event, because
3857 * rb_advance_reader only advances by one event,
3858 * whereas rb_event_ts_length may include the size of
3859 * one or two events.
3860 * We have already ensured there's enough space if this
3861 * is a time extend. */
3862 size = rb_event_length(event);
3856 memcpy(bpage->data + pos, rpage->data + rpos, size); 3863 memcpy(bpage->data + pos, rpage->data + rpos, size);
3857 3864
3858 len -= size; 3865 len -= size;
@@ -3867,7 +3874,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3867 event = rb_reader_event(cpu_buffer); 3874 event = rb_reader_event(cpu_buffer);
3868 /* Always keep the time extend and data together */ 3875 /* Always keep the time extend and data together */
3869 size = rb_event_ts_length(event); 3876 size = rb_event_ts_length(event);
3870 } while (len > size); 3877 } while (len >= size);
3871 3878
3872 /* update bpage */ 3879 /* update bpage */
3873 local_set(&bpage->commit, pos); 3880 local_set(&bpage->commit, pos);
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 39c059ca670e..19a359d5e6d5 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -21,17 +21,46 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
21/* Count the events in use (per event id, not per instance) */ 21/* Count the events in use (per event id, not per instance) */
22static int total_ref_count; 22static int total_ref_count;
23 23
24static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
25 struct perf_event *p_event)
26{
27 /* No tracing, just counting, so no obvious leak */
28 if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
29 return 0;
30
31 /* Some events are ok to be traced by non-root users... */
32 if (p_event->attach_state == PERF_ATTACH_TASK) {
33 if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY)
34 return 0;
35 }
36
37 /*
38 * ...otherwise raw tracepoint data can be a severe data leak,
39 * only allow root to have these.
40 */
41 if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
42 return -EPERM;
43
44 return 0;
45}
46
24static int perf_trace_event_init(struct ftrace_event_call *tp_event, 47static int perf_trace_event_init(struct ftrace_event_call *tp_event,
25 struct perf_event *p_event) 48 struct perf_event *p_event)
26{ 49{
27 struct hlist_head __percpu *list; 50 struct hlist_head __percpu *list;
28 int ret = -ENOMEM; 51 int ret;
29 int cpu; 52 int cpu;
30 53
54 ret = perf_trace_event_perm(tp_event, p_event);
55 if (ret)
56 return ret;
57
31 p_event->tp_event = tp_event; 58 p_event->tp_event = tp_event;
32 if (tp_event->perf_refcount++ > 0) 59 if (tp_event->perf_refcount++ > 0)
33 return 0; 60 return 0;
34 61
62 ret = -ENOMEM;
63
35 list = alloc_percpu(struct hlist_head); 64 list = alloc_percpu(struct hlist_head);
36 if (!list) 65 if (!list)
37 goto fail; 66 goto fail;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 0725eeab1937..35fde09b81de 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -27,6 +27,12 @@
27 27
28DEFINE_MUTEX(event_mutex); 28DEFINE_MUTEX(event_mutex);
29 29
30DEFINE_MUTEX(event_storage_mutex);
31EXPORT_SYMBOL_GPL(event_storage_mutex);
32
33char event_storage[EVENT_STORAGE_SIZE];
34EXPORT_SYMBOL_GPL(event_storage);
35
30LIST_HEAD(ftrace_events); 36LIST_HEAD(ftrace_events);
31LIST_HEAD(ftrace_common_fields); 37LIST_HEAD(ftrace_common_fields);
32 38
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 4ba44deaac25..4b74d71705c0 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -83,13 +83,19 @@ static void __always_unused ____ftrace_check_##name(void) \
83 83
84#undef __array 84#undef __array
85#define __array(type, item, len) \ 85#define __array(type, item, len) \
86 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ 86 do { \
87 ret = trace_define_field(event_call, #type "[" #len "]", #item, \ 87 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
88 mutex_lock(&event_storage_mutex); \
89 snprintf(event_storage, sizeof(event_storage), \
90 "%s[%d]", #type, len); \
91 ret = trace_define_field(event_call, event_storage, #item, \
88 offsetof(typeof(field), item), \ 92 offsetof(typeof(field), item), \
89 sizeof(field.item), \ 93 sizeof(field.item), \
90 is_signed_type(type), FILTER_OTHER); \ 94 is_signed_type(type), FILTER_OTHER); \
91 if (ret) \ 95 mutex_unlock(&event_storage_mutex); \
92 return ret; 96 if (ret) \
97 return ret; \
98 } while (0);
93 99
94#undef __array_desc 100#undef __array_desc
95#define __array_desc(type, container, item, len) \ 101#define __array_desc(type, container, item, len) \
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 155a415b3209..562c56e048fd 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -558,7 +558,7 @@ trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr)
558static int trace_wakeup_test_thread(void *data) 558static int trace_wakeup_test_thread(void *data)
559{ 559{
560 /* Make this a RT thread, doesn't need to be too high */ 560 /* Make this a RT thread, doesn't need to be too high */
561 struct sched_param param = { .sched_priority = 5 }; 561 static struct sched_param param = { .sched_priority = 5 };
562 struct completion *x = data; 562 struct completion *x = data;
563 563
564 sched_setscheduler(current, SCHED_FIFO, &param); 564 sched_setscheduler(current, SCHED_FIFO, &param);
diff --git a/kernel/user.c b/kernel/user.c
index 2c7d8d5914b1..5c598ca781df 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -158,6 +158,7 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid)
158 spin_lock_irq(&uidhash_lock); 158 spin_lock_irq(&uidhash_lock);
159 up = uid_hash_find(uid, hashent); 159 up = uid_hash_find(uid, hashent);
160 if (up) { 160 if (up) {
161 put_user_ns(ns);
161 key_put(new->uid_keyring); 162 key_put(new->uid_keyring);
162 key_put(new->session_keyring); 163 key_put(new->session_keyring);
163 kmem_cache_free(uid_cachep, new); 164 kmem_cache_free(uid_cachep, new);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 6e3c41a4024c..6e7b575ac33c 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -57,6 +57,8 @@ static int __init hardlockup_panic_setup(char *str)
57{ 57{
58 if (!strncmp(str, "panic", 5)) 58 if (!strncmp(str, "panic", 5))
59 hardlockup_panic = 1; 59 hardlockup_panic = 1;
60 else if (!strncmp(str, "0", 1))
61 no_watchdog = 1;
60 return 1; 62 return 1;
61} 63}
62__setup("nmi_watchdog=", hardlockup_panic_setup); 64__setup("nmi_watchdog=", hardlockup_panic_setup);
@@ -307,7 +309,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
307 */ 309 */
308static int watchdog(void *unused) 310static int watchdog(void *unused)
309{ 311{
310 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 312 static struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
311 struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); 313 struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
312 314
313 sched_setscheduler(current, SCHED_FIFO, &param); 315 sched_setscheduler(current, SCHED_FIFO, &param);
@@ -364,7 +366,8 @@ static int watchdog_nmi_enable(int cpu)
364 goto out_save; 366 goto out_save;
365 } 367 }
366 368
367 printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event); 369 printk(KERN_ERR "NMI watchdog disabled for cpu%i: unable to create perf event: %ld\n",
370 cpu, PTR_ERR(event));
368 return PTR_ERR(event); 371 return PTR_ERR(event);
369 372
370 /* success path */ 373 /* success path */
@@ -547,13 +550,13 @@ static struct notifier_block __cpuinitdata cpu_nfb = {
547 .notifier_call = cpu_callback 550 .notifier_call = cpu_callback
548}; 551};
549 552
550static int __init spawn_watchdog_task(void) 553void __init lockup_detector_init(void)
551{ 554{
552 void *cpu = (void *)(long)smp_processor_id(); 555 void *cpu = (void *)(long)smp_processor_id();
553 int err; 556 int err;
554 557
555 if (no_watchdog) 558 if (no_watchdog)
556 return 0; 559 return;
557 560
558 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); 561 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
559 WARN_ON(notifier_to_errno(err)); 562 WARN_ON(notifier_to_errno(err));
@@ -561,6 +564,5 @@ static int __init spawn_watchdog_task(void)
561 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); 564 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
562 register_cpu_notifier(&cpu_nfb); 565 register_cpu_notifier(&cpu_nfb);
563 566
564 return 0; 567 return;
565} 568}
566early_initcall(spawn_watchdog_task);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 28b42b9274d0..2d05adb98401 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -173,7 +173,8 @@ config LOCKUP_DETECTOR
173 An NMI is generated every 60 seconds or so to check for hardlockups. 173 An NMI is generated every 60 seconds or so to check for hardlockups.
174 174
175config HARDLOCKUP_DETECTOR 175config HARDLOCKUP_DETECTOR
176 def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI 176 def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \
177 !ARCH_HAS_NMI_WATCHDOG
177 178
178config BOOTPARAM_SOFTLOCKUP_PANIC 179config BOOTPARAM_SOFTLOCKUP_PANIC
179 bool "Panic (Reboot) On Soft Lockups" 180 bool "Panic (Reboot) On Soft Lockups"
diff --git a/lib/Makefile b/lib/Makefile
index e6a3763b8212..9e2db72d128e 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -8,7 +8,7 @@ KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
8endif 8endif
9 9
10lib-y := ctype.o string.o vsprintf.o cmdline.o \ 10lib-y := ctype.o string.o vsprintf.o cmdline.o \
11 rbtree.o radix-tree.o dump_stack.o \ 11 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
12 idr.o int_sqrt.o extable.o prio_tree.o \ 12 idr.o int_sqrt.o extable.o prio_tree.o \
13 sha1.o irq_regs.o reciprocal_div.o argv_split.o \ 13 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
14 proportions.o prio_heap.o ratelimit.o show_mem.o \ 14 proportions.o prio_heap.o ratelimit.o show_mem.o \
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
new file mode 100644
index 000000000000..e3a1050e6820
--- /dev/null
+++ b/lib/timerqueue.c
@@ -0,0 +1,107 @@
1/*
2 * Generic Timer-queue
3 *
4 * Manages a simple queue of timers, ordered by expiration time.
5 * Uses rbtrees for quick list adds and expiration.
6 *
7 * NOTE: All of the following functions need to be serialized
8 * to avoid races. No locking is done by this libary code.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24
25#include <linux/timerqueue.h>
26#include <linux/rbtree.h>
27#include <linux/module.h>
28
29/**
30 * timerqueue_add - Adds timer to timerqueue.
31 *
32 * @head: head of timerqueue
33 * @node: timer node to be added
34 *
35 * Adds the timer node to the timerqueue, sorted by the
36 * node's expires value.
37 */
38void timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
39{
40 struct rb_node **p = &head->head.rb_node;
41 struct rb_node *parent = NULL;
42 struct timerqueue_node *ptr;
43
44 /* Make sure we don't add nodes that are already added */
45 WARN_ON_ONCE(!RB_EMPTY_NODE(&node->node));
46
47 while (*p) {
48 parent = *p;
49 ptr = rb_entry(parent, struct timerqueue_node, node);
50 if (node->expires.tv64 < ptr->expires.tv64)
51 p = &(*p)->rb_left;
52 else
53 p = &(*p)->rb_right;
54 }
55 rb_link_node(&node->node, parent, p);
56 rb_insert_color(&node->node, &head->head);
57
58 if (!head->next || node->expires.tv64 < head->next->expires.tv64)
59 head->next = node;
60}
61EXPORT_SYMBOL_GPL(timerqueue_add);
62
63/**
64 * timerqueue_del - Removes a timer from the timerqueue.
65 *
66 * @head: head of timerqueue
67 * @node: timer node to be removed
68 *
69 * Removes the timer node from the timerqueue.
70 */
71void timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
72{
73 WARN_ON_ONCE(RB_EMPTY_NODE(&node->node));
74
75 /* update next pointer */
76 if (head->next == node) {
77 struct rb_node *rbn = rb_next(&node->node);
78
79 head->next = rbn ?
80 rb_entry(rbn, struct timerqueue_node, node) : NULL;
81 }
82 rb_erase(&node->node, &head->head);
83 RB_CLEAR_NODE(&node->node);
84}
85EXPORT_SYMBOL_GPL(timerqueue_del);
86
87/**
88 * timerqueue_iterate_next - Returns the timer after the provided timer
89 *
90 * @node: Pointer to a timer.
91 *
92 * Provides the timer that is after the given node. This is used, when
93 * necessary, to iterate through the list of timers in a timer list
94 * without modifying the list.
95 */
96struct timerqueue_node *timerqueue_iterate_next(struct timerqueue_node *node)
97{
98 struct rb_node *next;
99
100 if (!node)
101 return NULL;
102 next = rb_next(&node->node);
103 if (!next)
104 return NULL;
105 return container_of(next, struct timerqueue_node, node);
106}
107EXPORT_SYMBOL_GPL(timerqueue_iterate_next);
diff --git a/mm/compaction.c b/mm/compaction.c
index 4d709ee59013..1a8894eadf72 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -279,7 +279,6 @@ static unsigned long isolate_migratepages(struct zone *zone,
279 /* Successfully isolated */ 279 /* Successfully isolated */
280 del_page_from_lru_list(zone, page, page_lru(page)); 280 del_page_from_lru_list(zone, page, page_lru(page));
281 list_add(&page->lru, migratelist); 281 list_add(&page->lru, migratelist);
282 mem_cgroup_del_lru(page);
283 cc->nr_migratepages++; 282 cc->nr_migratepages++;
284 283
285 /* Avoid isolating too much */ 284 /* Avoid isolating too much */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7a22b4129211..00bb8a64d028 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1925,19 +1925,18 @@ again:
1925 1925
1926 rcu_read_lock(); 1926 rcu_read_lock();
1927 p = rcu_dereference(mm->owner); 1927 p = rcu_dereference(mm->owner);
1928 VM_BUG_ON(!p);
1929 /* 1928 /*
1930 * because we don't have task_lock(), "p" can exit while 1929 * Because we don't have task_lock(), "p" can exit.
1931 * we're here. In that case, "mem" can point to root 1930 * In that case, "mem" can point to root or p can be NULL with
1932 * cgroup but never be NULL. (and task_struct itself is freed 1931 * race with swapoff. Then, we have small risk of mis-accouning.
1933 * by RCU, cgroup itself is RCU safe.) Then, we have small 1932 * But such kind of mis-account by race always happens because
1934 * risk here to get wrong cgroup. But such kind of mis-account 1933 * we don't have cgroup_mutex(). It's overkill and we allo that
1935 * by race always happens because we don't have cgroup_mutex(). 1934 * small race, here.
1936 * It's overkill and we allow that small race, here. 1935 * (*) swapoff at el will charge against mm-struct not against
1936 * task-struct. So, mm->owner can be NULL.
1937 */ 1937 */
1938 mem = mem_cgroup_from_task(p); 1938 mem = mem_cgroup_from_task(p);
1939 VM_BUG_ON(!mem); 1939 if (!mem || mem_cgroup_is_root(mem)) {
1940 if (mem_cgroup_is_root(mem)) {
1941 rcu_read_unlock(); 1940 rcu_read_unlock();
1942 goto done; 1941 goto done;
1943 } 1942 }
diff --git a/mm/migrate.c b/mm/migrate.c
index fe5a3c6a5426..6ae8a66a7045 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -35,6 +35,8 @@
35#include <linux/hugetlb.h> 35#include <linux/hugetlb.h>
36#include <linux/gfp.h> 36#include <linux/gfp.h>
37 37
38#include <asm/tlbflush.h>
39
38#include "internal.h" 40#include "internal.h"
39 41
40#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) 42#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
diff --git a/mm/nommu.c b/mm/nommu.c
index 27a9ac588516..ef4045d010d5 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -10,7 +10,7 @@
10 * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> 10 * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com>
11 * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> 11 * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org>
12 * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> 12 * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com>
13 * Copyright (c) 2007-2009 Paul Mundt <lethal@linux-sh.org> 13 * Copyright (c) 2007-2010 Paul Mundt <lethal@linux-sh.org>
14 */ 14 */
15 15
16#include <linux/module.h> 16#include <linux/module.h>
@@ -328,6 +328,7 @@ void *vmalloc_node(unsigned long size, int node)
328{ 328{
329 return vmalloc(size); 329 return vmalloc(size);
330} 330}
331EXPORT_SYMBOL(vmalloc_node);
331 332
332/** 333/**
333 * vzalloc_node - allocate memory on a specific node with zero fill 334 * vzalloc_node - allocate memory on a specific node with zero fill
@@ -440,6 +441,31 @@ void __attribute__((weak)) vmalloc_sync_all(void)
440{ 441{
441} 442}
442 443
444/**
445 * alloc_vm_area - allocate a range of kernel address space
446 * @size: size of the area
447 *
448 * Returns: NULL on failure, vm_struct on success
449 *
450 * This function reserves a range of kernel address space, and
451 * allocates pagetables to map that range. No actual mappings
452 * are created. If the kernel address space is not shared
453 * between processes, it syncs the pagetable across all
454 * processes.
455 */
456struct vm_struct *alloc_vm_area(size_t size)
457{
458 BUG();
459 return NULL;
460}
461EXPORT_SYMBOL_GPL(alloc_vm_area);
462
463void free_vm_area(struct vm_struct *area)
464{
465 BUG();
466}
467EXPORT_SYMBOL_GPL(free_vm_area);
468
443int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, 469int vm_insert_page(struct vm_area_struct *vma, unsigned long addr,
444 struct page *page) 470 struct page *page)
445{ 471{
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b840afa89761..b4edfe7ce06c 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -563,7 +563,7 @@ static void balance_dirty_pages(struct address_space *mapping,
563 break; /* We've done our duty */ 563 break; /* We've done our duty */
564 } 564 }
565 trace_wbc_balance_dirty_wait(&wbc, bdi); 565 trace_wbc_balance_dirty_wait(&wbc, bdi);
566 __set_current_state(TASK_INTERRUPTIBLE); 566 __set_current_state(TASK_UNINTERRUPTIBLE);
567 io_schedule_timeout(pause); 567 io_schedule_timeout(pause);
568 568
569 /* 569 /*
diff --git a/mm/percpu.c b/mm/percpu.c
index efe816856a9d..02ba91230b99 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1268,7 +1268,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1268 1268
1269 /* we're done parsing the input, undefine BUG macro and dump config */ 1269 /* we're done parsing the input, undefine BUG macro and dump config */
1270#undef PCPU_SETUP_BUG_ON 1270#undef PCPU_SETUP_BUG_ON
1271 pcpu_dump_alloc_info(KERN_INFO, ai); 1271 pcpu_dump_alloc_info(KERN_DEBUG, ai);
1272 1272
1273 pcpu_nr_groups = ai->nr_groups; 1273 pcpu_nr_groups = ai->nr_groups;
1274 pcpu_group_offsets = group_offsets; 1274 pcpu_group_offsets = group_offsets;
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index fa642aa652bd..432a9a633e8d 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -311,6 +311,7 @@ static void rfcomm_dlc_clear_state(struct rfcomm_dlc *d)
311 d->state = BT_OPEN; 311 d->state = BT_OPEN;
312 d->flags = 0; 312 d->flags = 0;
313 d->mscex = 0; 313 d->mscex = 0;
314 d->sec_level = BT_SECURITY_LOW;
314 d->mtu = RFCOMM_DEFAULT_MTU; 315 d->mtu = RFCOMM_DEFAULT_MTU;
315 d->v24_sig = RFCOMM_V24_RTC | RFCOMM_V24_RTR | RFCOMM_V24_DV; 316 d->v24_sig = RFCOMM_V24_RTC | RFCOMM_V24_RTR | RFCOMM_V24_DV;
316 317
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index eb5b256ffc88..543b3262d002 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -437,7 +437,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
437 ip6h = ipv6_hdr(skb); 437 ip6h = ipv6_hdr(skb);
438 438
439 *(__force __be32 *)ip6h = htonl(0x60000000); 439 *(__force __be32 *)ip6h = htonl(0x60000000);
440 ip6h->payload_len = 8 + sizeof(*mldq); 440 ip6h->payload_len = htons(8 + sizeof(*mldq));
441 ip6h->nexthdr = IPPROTO_HOPOPTS; 441 ip6h->nexthdr = IPPROTO_HOPOPTS;
442 ip6h->hop_limit = 1; 442 ip6h->hop_limit = 1;
443 ipv6_addr_set(&ip6h->saddr, 0, 0, 0, 0); 443 ipv6_addr_set(&ip6h->saddr, 0, 0, 0, 0);
@@ -1430,7 +1430,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
1430 struct net_bridge_port *port, 1430 struct net_bridge_port *port,
1431 struct sk_buff *skb) 1431 struct sk_buff *skb)
1432{ 1432{
1433 struct sk_buff *skb2 = skb; 1433 struct sk_buff *skb2;
1434 struct ipv6hdr *ip6h; 1434 struct ipv6hdr *ip6h;
1435 struct icmp6hdr *icmp6h; 1435 struct icmp6hdr *icmp6h;
1436 u8 nexthdr; 1436 u8 nexthdr;
@@ -1469,15 +1469,15 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
1469 if (!skb2) 1469 if (!skb2)
1470 return -ENOMEM; 1470 return -ENOMEM;
1471 1471
1472 err = -EINVAL;
1473 if (!pskb_may_pull(skb2, offset + sizeof(struct icmp6hdr)))
1474 goto out;
1475
1472 len -= offset - skb_network_offset(skb2); 1476 len -= offset - skb_network_offset(skb2);
1473 1477
1474 __skb_pull(skb2, offset); 1478 __skb_pull(skb2, offset);
1475 skb_reset_transport_header(skb2); 1479 skb_reset_transport_header(skb2);
1476 1480
1477 err = -EINVAL;
1478 if (!pskb_may_pull(skb2, sizeof(*icmp6h)))
1479 goto out;
1480
1481 icmp6h = icmp6_hdr(skb2); 1481 icmp6h = icmp6_hdr(skb2);
1482 1482
1483 switch (icmp6h->icmp6_type) { 1483 switch (icmp6h->icmp6_type) {
@@ -1516,7 +1516,12 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
1516 switch (icmp6h->icmp6_type) { 1516 switch (icmp6h->icmp6_type) {
1517 case ICMPV6_MGM_REPORT: 1517 case ICMPV6_MGM_REPORT:
1518 { 1518 {
1519 struct mld_msg *mld = (struct mld_msg *)icmp6h; 1519 struct mld_msg *mld;
1520 if (!pskb_may_pull(skb2, sizeof(*mld))) {
1521 err = -EINVAL;
1522 goto out;
1523 }
1524 mld = (struct mld_msg *)skb_transport_header(skb2);
1520 BR_INPUT_SKB_CB(skb2)->mrouters_only = 1; 1525 BR_INPUT_SKB_CB(skb2)->mrouters_only = 1;
1521 err = br_ip6_multicast_add_group(br, port, &mld->mld_mca); 1526 err = br_ip6_multicast_add_group(br, port, &mld->mld_mca);
1522 break; 1527 break;
@@ -1529,15 +1534,18 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
1529 break; 1534 break;
1530 case ICMPV6_MGM_REDUCTION: 1535 case ICMPV6_MGM_REDUCTION:
1531 { 1536 {
1532 struct mld_msg *mld = (struct mld_msg *)icmp6h; 1537 struct mld_msg *mld;
1538 if (!pskb_may_pull(skb2, sizeof(*mld))) {
1539 err = -EINVAL;
1540 goto out;
1541 }
1542 mld = (struct mld_msg *)skb_transport_header(skb2);
1533 br_ip6_multicast_leave_group(br, port, &mld->mld_mca); 1543 br_ip6_multicast_leave_group(br, port, &mld->mld_mca);
1534 } 1544 }
1535 } 1545 }
1536 1546
1537out: 1547out:
1538 __skb_push(skb2, offset); 1548 kfree_skb(skb2);
1539 if (skb2 != skb)
1540 kfree_skb(skb2);
1541 return err; 1549 return err;
1542} 1550}
1543#endif 1551#endif
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 35cf27087b56..e3d7aefa9181 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -50,6 +50,8 @@ static void br_send_bpdu(struct net_bridge_port *p,
50 50
51 llc_mac_hdr_init(skb, p->dev->dev_addr, p->br->group_addr); 51 llc_mac_hdr_init(skb, p->dev->dev_addr, p->br->group_addr);
52 52
53 skb_reset_mac_header(skb);
54
53 NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, 55 NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
54 dev_queue_xmit); 56 dev_queue_xmit);
55} 57}
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 6faa8256e10c..9d5e8accfab1 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -125,7 +125,7 @@ struct bcm_sock {
125 struct list_head tx_ops; 125 struct list_head tx_ops;
126 unsigned long dropped_usr_msgs; 126 unsigned long dropped_usr_msgs;
127 struct proc_dir_entry *bcm_proc_read; 127 struct proc_dir_entry *bcm_proc_read;
128 char procname [20]; /* pointer printed in ASCII with \0 */ 128 char procname [32]; /* inode number in decimal with \0 */
129}; 129};
130 130
131static inline struct bcm_sock *bcm_sk(const struct sock *sk) 131static inline struct bcm_sock *bcm_sk(const struct sock *sk)
@@ -1521,7 +1521,7 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
1521 1521
1522 if (proc_dir) { 1522 if (proc_dir) {
1523 /* unique socket address as filename */ 1523 /* unique socket address as filename */
1524 sprintf(bo->procname, "%p", sock); 1524 sprintf(bo->procname, "%lu", sock_i_ino(sk));
1525 bo->bcm_proc_read = proc_create_data(bo->procname, 0644, 1525 bo->bcm_proc_read = proc_create_data(bo->procname, 0644,
1526 proc_dir, 1526 proc_dir,
1527 &bcm_proc_fops, sk); 1527 &bcm_proc_fops, sk);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 82a4369ae150..a20e5d3bbfa0 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -181,8 +181,7 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
181{ 181{
182 int ret = 0; 182 int ret = 0;
183 183
184 if (rule->iifindex && (rule->iifindex != fl->iif) && 184 if (rule->iifindex && (rule->iifindex != fl->iif))
185 !(fl->flags & FLOWI_FLAG_MATCH_ANY_IIF))
186 goto out; 185 goto out;
187 186
188 if (rule->oifindex && (rule->oifindex != fl->oif)) 187 if (rule->oifindex && (rule->oifindex != fl->oif))
diff --git a/net/core/sock.c b/net/core/sock.c
index fb6080111461..e5af8d5d5b50 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1009,6 +1009,36 @@ static void sock_copy(struct sock *nsk, const struct sock *osk)
1009#endif 1009#endif
1010} 1010}
1011 1011
1012/*
1013 * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes
1014 * un-modified. Special care is taken when initializing object to zero.
1015 */
1016static inline void sk_prot_clear_nulls(struct sock *sk, int size)
1017{
1018 if (offsetof(struct sock, sk_node.next) != 0)
1019 memset(sk, 0, offsetof(struct sock, sk_node.next));
1020 memset(&sk->sk_node.pprev, 0,
1021 size - offsetof(struct sock, sk_node.pprev));
1022}
1023
1024void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
1025{
1026 unsigned long nulls1, nulls2;
1027
1028 nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
1029 nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
1030 if (nulls1 > nulls2)
1031 swap(nulls1, nulls2);
1032
1033 if (nulls1 != 0)
1034 memset((char *)sk, 0, nulls1);
1035 memset((char *)sk + nulls1 + sizeof(void *), 0,
1036 nulls2 - nulls1 - sizeof(void *));
1037 memset((char *)sk + nulls2 + sizeof(void *), 0,
1038 size - nulls2 - sizeof(void *));
1039}
1040EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
1041
1012static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, 1042static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1013 int family) 1043 int family)
1014{ 1044{
@@ -1021,19 +1051,12 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1021 if (!sk) 1051 if (!sk)
1022 return sk; 1052 return sk;
1023 if (priority & __GFP_ZERO) { 1053 if (priority & __GFP_ZERO) {
1024 /* 1054 if (prot->clear_sk)
1025 * caches using SLAB_DESTROY_BY_RCU should let 1055 prot->clear_sk(sk, prot->obj_size);
1026 * sk_node.next un-modified. Special care is taken 1056 else
1027 * when initializing object to zero. 1057 sk_prot_clear_nulls(sk, prot->obj_size);
1028 */
1029 if (offsetof(struct sock, sk_node.next) != 0)
1030 memset(sk, 0, offsetof(struct sock, sk_node.next));
1031 memset(&sk->sk_node.pprev, 0,
1032 prot->obj_size - offsetof(struct sock,
1033 sk_node.pprev));
1034 } 1058 }
1035 } 1059 } else
1036 else
1037 sk = kmalloc(prot->obj_size, priority); 1060 sk = kmalloc(prot->obj_size, priority);
1038 1061
1039 if (sk != NULL) { 1062 if (sk != NULL) {
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index eb6f69a8f27a..c19c1f739fba 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -163,13 +163,19 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
163 .daddr = addr 163 .daddr = addr
164 } 164 }
165 }, 165 },
166 .flags = FLOWI_FLAG_MATCH_ANY_IIF
167 }; 166 };
168 struct fib_result res = { 0 }; 167 struct fib_result res = { 0 };
169 struct net_device *dev = NULL; 168 struct net_device *dev = NULL;
169 struct fib_table *local_table;
170
171#ifdef CONFIG_IP_MULTIPLE_TABLES
172 res.r = NULL;
173#endif
170 174
171 rcu_read_lock(); 175 rcu_read_lock();
172 if (fib_lookup(net, &fl, &res)) { 176 local_table = fib_get_table(net, RT_TABLE_LOCAL);
177 if (!local_table ||
178 fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) {
173 rcu_read_unlock(); 179 rcu_read_unlock();
174 return NULL; 180 return NULL;
175 } 181 }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 987bf9adb318..93bfd95584f4 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2585,9 +2585,10 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2585 goto out; 2585 goto out;
2586 2586
2587 /* RACE: Check return value of inet_select_addr instead. */ 2587 /* RACE: Check return value of inet_select_addr instead. */
2588 if (rcu_dereference(dev_out->ip_ptr) == NULL) 2588 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
2589 goto out; /* Wrong error code */ 2589 err = -ENETUNREACH;
2590 2590 goto out;
2591 }
2591 if (ipv4_is_local_multicast(oldflp->fl4_dst) || 2592 if (ipv4_is_local_multicast(oldflp->fl4_dst) ||
2592 ipv4_is_lbcast(oldflp->fl4_dst)) { 2593 ipv4_is_lbcast(oldflp->fl4_dst)) {
2593 if (!fl.fl4_src) 2594 if (!fl.fl4_src)
@@ -2648,8 +2649,12 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2648 } 2649 }
2649 2650
2650 if (res.type == RTN_LOCAL) { 2651 if (res.type == RTN_LOCAL) {
2651 if (!fl.fl4_src) 2652 if (!fl.fl4_src) {
2652 fl.fl4_src = fl.fl4_dst; 2653 if (res.fi->fib_prefsrc)
2654 fl.fl4_src = res.fi->fib_prefsrc;
2655 else
2656 fl.fl4_src = fl.fl4_dst;
2657 }
2653 dev_out = net->loopback_dev; 2658 dev_out = net->loopback_dev;
2654 fl.oif = dev_out->ifindex; 2659 fl.oif = dev_out->ifindex;
2655 res.fi = NULL; 2660 res.fi = NULL;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e13da6de1fc7..d978bb2f748b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2030,7 +2030,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
2030get_req: 2030get_req:
2031 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; 2031 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2032 } 2032 }
2033 sk = sk_next(st->syn_wait_sk); 2033 sk = sk_nulls_next(st->syn_wait_sk);
2034 st->state = TCP_SEQ_STATE_LISTENING; 2034 st->state = TCP_SEQ_STATE_LISTENING;
2035 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2035 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2036 } else { 2036 } else {
@@ -2039,7 +2039,7 @@ get_req:
2039 if (reqsk_queue_len(&icsk->icsk_accept_queue)) 2039 if (reqsk_queue_len(&icsk->icsk_accept_queue))
2040 goto start_req; 2040 goto start_req;
2041 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2041 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2042 sk = sk_next(sk); 2042 sk = sk_nulls_next(sk);
2043 } 2043 }
2044get_sk: 2044get_sk:
2045 sk_nulls_for_each_from(sk, node) { 2045 sk_nulls_for_each_from(sk, node) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5e0a3a582a59..2d3ded4d0786 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1899,6 +1899,7 @@ struct proto udp_prot = {
1899 .compat_setsockopt = compat_udp_setsockopt, 1899 .compat_setsockopt = compat_udp_setsockopt,
1900 .compat_getsockopt = compat_udp_getsockopt, 1900 .compat_getsockopt = compat_udp_getsockopt,
1901#endif 1901#endif
1902 .clear_sk = sk_prot_clear_portaddr_nulls,
1902}; 1903};
1903EXPORT_SYMBOL(udp_prot); 1904EXPORT_SYMBOL(udp_prot);
1904 1905
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index ab76aa928fa9..aee9963f7f5a 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -57,6 +57,7 @@ struct proto udplite_prot = {
57 .compat_setsockopt = compat_udp_setsockopt, 57 .compat_setsockopt = compat_udp_setsockopt,
58 .compat_getsockopt = compat_udp_getsockopt, 58 .compat_getsockopt = compat_udp_getsockopt,
59#endif 59#endif
60 .clear_sk = sk_prot_clear_portaddr_nulls,
60}; 61};
61EXPORT_SYMBOL(udplite_prot); 62EXPORT_SYMBOL(udplite_prot);
62 63
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 93b7a933a775..848b35591042 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2669,7 +2669,9 @@ static int addrconf_ifdown(struct net_device *dev, int how)
2669 2669
2670 ASSERT_RTNL(); 2670 ASSERT_RTNL();
2671 2671
2672 rt6_ifdown(net, dev); 2672 /* Flush routes if device is being removed or it is not loopback */
2673 if (how || !(dev->flags & IFF_LOOPBACK))
2674 rt6_ifdown(net, dev);
2673 neigh_ifdown(&nd_tbl, dev); 2675 neigh_ifdown(&nd_tbl, dev);
2674 2676
2675 idev = __in6_dev_get(dev); 2677 idev = __in6_dev_get(dev);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 99157b4cd56e..94b5bf132b2e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -56,7 +56,7 @@
56#include <net/checksum.h> 56#include <net/checksum.h>
57#include <linux/mroute6.h> 57#include <linux/mroute6.h>
58 58
59static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); 59int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60 60
61int __ip6_local_out(struct sk_buff *skb) 61int __ip6_local_out(struct sk_buff *skb)
62{ 62{
@@ -145,14 +145,6 @@ static int ip6_finish_output2(struct sk_buff *skb)
145 return -EINVAL; 145 return -EINVAL;
146} 146}
147 147
148static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
149{
150 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
151
152 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
153 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
154}
155
156static int ip6_finish_output(struct sk_buff *skb) 148static int ip6_finish_output(struct sk_buff *skb)
157{ 149{
158 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || 150 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
@@ -601,7 +593,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
601 return offset; 593 return offset;
602} 594}
603 595
604static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) 596int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
605{ 597{
606 struct sk_buff *frag; 598 struct sk_buff *frag;
607 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); 599 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 96455ffb76fb..7659d6f16e6b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1565,11 +1565,16 @@ static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1565{ 1565{
1566 struct rt6_info *rt, *nrt; 1566 struct rt6_info *rt, *nrt;
1567 int allfrag = 0; 1567 int allfrag = 0;
1568 1568again:
1569 rt = rt6_lookup(net, daddr, saddr, ifindex, 0); 1569 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1570 if (rt == NULL) 1570 if (rt == NULL)
1571 return; 1571 return;
1572 1572
1573 if (rt6_check_expired(rt)) {
1574 ip6_del_rt(rt);
1575 goto again;
1576 }
1577
1573 if (pmtu >= dst_mtu(&rt->dst)) 1578 if (pmtu >= dst_mtu(&rt->dst))
1574 goto out; 1579 goto out;
1575 1580
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 91def93bec85..cd6cb7c3e563 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1477,6 +1477,7 @@ struct proto udpv6_prot = {
1477 .compat_setsockopt = compat_udpv6_setsockopt, 1477 .compat_setsockopt = compat_udpv6_setsockopt,
1478 .compat_getsockopt = compat_udpv6_getsockopt, 1478 .compat_getsockopt = compat_udpv6_getsockopt,
1479#endif 1479#endif
1480 .clear_sk = sk_prot_clear_portaddr_nulls,
1480}; 1481};
1481 1482
1482static struct inet_protosw udpv6_protosw = { 1483static struct inet_protosw udpv6_protosw = {
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 5f48fadc27f7..986c4de5292e 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -55,6 +55,7 @@ struct proto udplitev6_prot = {
55 .compat_setsockopt = compat_udpv6_setsockopt, 55 .compat_setsockopt = compat_udpv6_setsockopt,
56 .compat_getsockopt = compat_udpv6_getsockopt, 56 .compat_getsockopt = compat_udpv6_getsockopt,
57#endif 57#endif
58 .clear_sk = sk_prot_clear_portaddr_nulls,
58}; 59};
59 60
60static struct inet_protosw udplite6_protosw = { 61static struct inet_protosw udplite6_protosw = {
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 6434bd5ce088..8e688b3de9ab 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -17,6 +17,7 @@
17#include <linux/netfilter_ipv6.h> 17#include <linux/netfilter_ipv6.h>
18#include <net/dst.h> 18#include <net/dst.h>
19#include <net/ipv6.h> 19#include <net/ipv6.h>
20#include <net/ip6_route.h>
20#include <net/xfrm.h> 21#include <net/xfrm.h>
21 22
22int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, 23int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
@@ -88,8 +89,21 @@ static int xfrm6_output_finish(struct sk_buff *skb)
88 return xfrm_output(skb); 89 return xfrm_output(skb);
89} 90}
90 91
92static int __xfrm6_output(struct sk_buff *skb)
93{
94 struct dst_entry *dst = skb_dst(skb);
95 struct xfrm_state *x = dst->xfrm;
96
97 if ((x && x->props.mode == XFRM_MODE_TUNNEL) &&
98 ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
99 dst_allfrag(skb_dst(skb)))) {
100 return ip6_fragment(skb, xfrm6_output_finish);
101 }
102 return xfrm6_output_finish(skb);
103}
104
91int xfrm6_output(struct sk_buff *skb) 105int xfrm6_output(struct sk_buff *skb)
92{ 106{
93 return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, 107 return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL,
94 skb_dst(skb)->dev, xfrm6_output_finish); 108 skb_dst(skb)->dev, __xfrm6_output);
95} 109}
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index a6de3059746d..c9890e25cd4c 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -2280,6 +2280,16 @@ static int irda_getsockopt(struct socket *sock, int level, int optname,
2280 2280
2281 switch (optname) { 2281 switch (optname) {
2282 case IRLMP_ENUMDEVICES: 2282 case IRLMP_ENUMDEVICES:
2283
2284 /* Offset to first device entry */
2285 offset = sizeof(struct irda_device_list) -
2286 sizeof(struct irda_device_info);
2287
2288 if (len < offset) {
2289 err = -EINVAL;
2290 goto out;
2291 }
2292
2283 /* Ask lmp for the current discovery log */ 2293 /* Ask lmp for the current discovery log */
2284 discoveries = irlmp_get_discoveries(&list.len, self->mask.word, 2294 discoveries = irlmp_get_discoveries(&list.len, self->mask.word,
2285 self->nslots); 2295 self->nslots);
@@ -2290,15 +2300,9 @@ static int irda_getsockopt(struct socket *sock, int level, int optname,
2290 } 2300 }
2291 2301
2292 /* Write total list length back to client */ 2302 /* Write total list length back to client */
2293 if (copy_to_user(optval, &list, 2303 if (copy_to_user(optval, &list, offset))
2294 sizeof(struct irda_device_list) -
2295 sizeof(struct irda_device_info)))
2296 err = -EFAULT; 2304 err = -EFAULT;
2297 2305
2298 /* Offset to first device entry */
2299 offset = sizeof(struct irda_device_list) -
2300 sizeof(struct irda_device_info);
2301
2302 /* Copy the list itself - watch for overflow */ 2306 /* Copy the list itself - watch for overflow */
2303 if (list.len > 2048) { 2307 if (list.len > 2048) {
2304 err = -EINVAL; 2308 err = -EINVAL;
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 239c4836a946..077a93dd1671 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -780,6 +780,9 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
780 780
781 mutex_lock(&sdata->u.ibss.mtx); 781 mutex_lock(&sdata->u.ibss.mtx);
782 782
783 if (!sdata->u.ibss.ssid_len)
784 goto mgmt_out; /* not ready to merge yet */
785
783 switch (fc & IEEE80211_FCTL_STYPE) { 786 switch (fc & IEEE80211_FCTL_STYPE) {
784 case IEEE80211_STYPE_PROBE_REQ: 787 case IEEE80211_STYPE_PROBE_REQ:
785 ieee80211_rx_mgmt_probe_req(sdata, mgmt, skb->len); 788 ieee80211_rx_mgmt_probe_req(sdata, mgmt, skb->len);
@@ -797,6 +800,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
797 break; 800 break;
798 } 801 }
799 802
803 mgmt_out:
800 mutex_unlock(&sdata->u.ibss.mtx); 804 mutex_unlock(&sdata->u.ibss.mtx);
801} 805}
802 806
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 54fb4a0e76f0..b01e467b76c6 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1788,9 +1788,11 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
1788 1788
1789 fwd_skb = skb_copy(skb, GFP_ATOMIC); 1789 fwd_skb = skb_copy(skb, GFP_ATOMIC);
1790 1790
1791 if (!fwd_skb && net_ratelimit()) 1791 if (!fwd_skb && net_ratelimit()) {
1792 printk(KERN_DEBUG "%s: failed to clone mesh frame\n", 1792 printk(KERN_DEBUG "%s: failed to clone mesh frame\n",
1793 sdata->name); 1793 sdata->name);
1794 goto out;
1795 }
1794 1796
1795 fwd_hdr = (struct ieee80211_hdr *) fwd_skb->data; 1797 fwd_hdr = (struct ieee80211_hdr *) fwd_skb->data;
1796 memcpy(fwd_hdr->addr2, sdata->vif.addr, ETH_ALEN); 1798 memcpy(fwd_hdr->addr2, sdata->vif.addr, ETH_ALEN);
@@ -1828,6 +1830,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
1828 } 1830 }
1829 } 1831 }
1830 1832
1833 out:
1831 if (is_multicast_ether_addr(hdr->addr1) || 1834 if (is_multicast_ether_addr(hdr->addr1) ||
1832 sdata->dev->flags & IFF_PROMISC) 1835 sdata->dev->flags & IFF_PROMISC)
1833 return RX_CONTINUE; 1836 return RX_CONTINUE;
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index ae344d1ba056..146097cb43a7 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -1051,11 +1051,13 @@ void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata)
1051{ 1051{
1052 struct ieee80211_local *local = sdata->local; 1052 struct ieee80211_local *local = sdata->local;
1053 struct ieee80211_work *wk; 1053 struct ieee80211_work *wk;
1054 bool cleanup = false;
1054 1055
1055 mutex_lock(&local->mtx); 1056 mutex_lock(&local->mtx);
1056 list_for_each_entry(wk, &local->work_list, list) { 1057 list_for_each_entry(wk, &local->work_list, list) {
1057 if (wk->sdata != sdata) 1058 if (wk->sdata != sdata)
1058 continue; 1059 continue;
1060 cleanup = true;
1059 wk->type = IEEE80211_WORK_ABORT; 1061 wk->type = IEEE80211_WORK_ABORT;
1060 wk->started = true; 1062 wk->started = true;
1061 wk->timeout = jiffies; 1063 wk->timeout = jiffies;
@@ -1063,7 +1065,8 @@ void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata)
1063 mutex_unlock(&local->mtx); 1065 mutex_unlock(&local->mtx);
1064 1066
1065 /* run cleanups etc. */ 1067 /* run cleanups etc. */
1066 ieee80211_work_work(&local->work_work); 1068 if (cleanup)
1069 ieee80211_work_work(&local->work_work);
1067 1070
1068 mutex_lock(&local->mtx); 1071 mutex_lock(&local->mtx);
1069 list_for_each_entry(wk, &local->work_list, list) { 1072 list_for_each_entry(wk, &local->work_list, list) {
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 3cf478d012dd..7150705f1d0b 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -270,7 +270,6 @@ static unsigned int sfq_drop(struct Qdisc *sch)
270 /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ 270 /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
271 d = q->next[q->tail]; 271 d = q->next[q->tail];
272 q->next[q->tail] = q->next[d]; 272 q->next[q->tail] = q->next[d];
273 q->allot[q->next[d]] += q->quantum;
274 skb = q->qs[d].prev; 273 skb = q->qs[d].prev;
275 len = qdisc_pkt_len(skb); 274 len = qdisc_pkt_len(skb);
276 __skb_unlink(skb, &q->qs[d]); 275 __skb_unlink(skb, &q->qs[d]);
@@ -321,14 +320,13 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
321 sfq_inc(q, x); 320 sfq_inc(q, x);
322 if (q->qs[x].qlen == 1) { /* The flow is new */ 321 if (q->qs[x].qlen == 1) { /* The flow is new */
323 if (q->tail == SFQ_DEPTH) { /* It is the first flow */ 322 if (q->tail == SFQ_DEPTH) { /* It is the first flow */
324 q->tail = x;
325 q->next[x] = x; 323 q->next[x] = x;
326 q->allot[x] = q->quantum;
327 } else { 324 } else {
328 q->next[x] = q->next[q->tail]; 325 q->next[x] = q->next[q->tail];
329 q->next[q->tail] = x; 326 q->next[q->tail] = x;
330 q->tail = x;
331 } 327 }
328 q->tail = x;
329 q->allot[x] = q->quantum;
332 } 330 }
333 if (++sch->q.qlen <= q->limit) { 331 if (++sch->q.qlen <= q->limit) {
334 sch->bstats.bytes += qdisc_pkt_len(skb); 332 sch->bstats.bytes += qdisc_pkt_len(skb);
@@ -359,13 +357,13 @@ sfq_dequeue(struct Qdisc *sch)
359{ 357{
360 struct sfq_sched_data *q = qdisc_priv(sch); 358 struct sfq_sched_data *q = qdisc_priv(sch);
361 struct sk_buff *skb; 359 struct sk_buff *skb;
362 sfq_index a, old_a; 360 sfq_index a, next_a;
363 361
364 /* No active slots */ 362 /* No active slots */
365 if (q->tail == SFQ_DEPTH) 363 if (q->tail == SFQ_DEPTH)
366 return NULL; 364 return NULL;
367 365
368 a = old_a = q->next[q->tail]; 366 a = q->next[q->tail];
369 367
370 /* Grab packet */ 368 /* Grab packet */
371 skb = __skb_dequeue(&q->qs[a]); 369 skb = __skb_dequeue(&q->qs[a]);
@@ -376,17 +374,15 @@ sfq_dequeue(struct Qdisc *sch)
376 /* Is the slot empty? */ 374 /* Is the slot empty? */
377 if (q->qs[a].qlen == 0) { 375 if (q->qs[a].qlen == 0) {
378 q->ht[q->hash[a]] = SFQ_DEPTH; 376 q->ht[q->hash[a]] = SFQ_DEPTH;
379 a = q->next[a]; 377 next_a = q->next[a];
380 if (a == old_a) { 378 if (a == next_a) {
381 q->tail = SFQ_DEPTH; 379 q->tail = SFQ_DEPTH;
382 return skb; 380 return skb;
383 } 381 }
384 q->next[q->tail] = a; 382 q->next[q->tail] = next_a;
385 q->allot[a] += q->quantum;
386 } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) { 383 } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) {
387 q->tail = a;
388 a = q->next[a];
389 q->allot[a] += q->quantum; 384 q->allot[a] += q->quantum;
385 q->tail = a;
390 } 386 }
391 return skb; 387 return skb;
392} 388}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 0b9ee34ad35c..fff0926b1111 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5053,7 +5053,7 @@ static int sctp_getsockopt_partial_delivery_point(struct sock *sk, int len,
5053 if (copy_to_user(optval, &val, len)) 5053 if (copy_to_user(optval, &val, len))
5054 return -EFAULT; 5054 return -EFAULT;
5055 5055
5056 return -ENOTSUPP; 5056 return 0;
5057} 5057}
5058 5058
5059/* 5059/*
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 5ad25e17b6cb..4eb99ab34053 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -214,17 +214,22 @@ ifdef BUILD_C_RECORDMCOUNT
214# The empty.o file is created in the make process in order to determine 214# The empty.o file is created in the make process in order to determine
215# the target endianness and word size. It is made before all other C 215# the target endianness and word size. It is made before all other C
216# files, including recordmcount. 216# files, including recordmcount.
217cmd_record_mcount = if [ $(@) != "scripts/mod/empty.o" ]; then \ 217sub_cmd_record_mcount = \
218 $(objtree)/scripts/recordmcount "$(@)"; \ 218 if [ $(@) != "scripts/mod/empty.o" ]; then \
219 fi; 219 $(objtree)/scripts/recordmcount "$(@)"; \
220 fi;
220else 221else
221cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ 222sub_cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
222 "$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \ 223 "$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \
223 "$(if $(CONFIG_64BIT),64,32)" \ 224 "$(if $(CONFIG_64BIT),64,32)" \
224 "$(OBJDUMP)" "$(OBJCOPY)" "$(CC) $(KBUILD_CFLAGS)" \ 225 "$(OBJDUMP)" "$(OBJCOPY)" "$(CC) $(KBUILD_CFLAGS)" \
225 "$(LD)" "$(NM)" "$(RM)" "$(MV)" \ 226 "$(LD)" "$(NM)" "$(RM)" "$(MV)" \
226 "$(if $(part-of-module),1,0)" "$(@)"; 227 "$(if $(part-of-module),1,0)" "$(@)";
227endif 228endif
229cmd_record_mcount = \
230 if [ "$(findstring -pg,$(_c_flags))" = "-pg" ]; then \
231 $(sub_cmd_record_mcount) \
232 fi;
228endif 233endif
229 234
230define rule_cc_o_c 235define rule_cc_o_c
diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c
index b9d9aa18e6d6..5f77dcb8977e 100644
--- a/scripts/kconfig/menu.c
+++ b/scripts/kconfig/menu.c
@@ -140,6 +140,20 @@ struct property *menu_add_prop(enum prop_type type, char *prompt, struct expr *e
140 } 140 }
141 if (current_entry->prompt && current_entry != &rootmenu) 141 if (current_entry->prompt && current_entry != &rootmenu)
142 prop_warn(prop, "prompt redefined"); 142 prop_warn(prop, "prompt redefined");
143
144 /* Apply all upper menus' visibilities to actual prompts. */
145 if(type == P_PROMPT) {
146 struct menu *menu = current_entry;
147
148 while ((menu = menu->parent) != NULL) {
149 if (!menu->visibility)
150 continue;
151 prop->visible.expr
152 = expr_alloc_and(prop->visible.expr,
153 menu->visibility);
154 }
155 }
156
143 current_entry->prompt = prop; 157 current_entry->prompt = prop;
144 } 158 }
145 prop->text = prompt; 159 prop->text = prompt;
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 39580a5dc5df..9f85012acf0d 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -155,6 +155,8 @@ use strict;
155# '@parameter' - name of a parameter 155# '@parameter' - name of a parameter
156# '%CONST' - name of a constant. 156# '%CONST' - name of a constant.
157 157
158## init lots of data
159
158my $errors = 0; 160my $errors = 0;
159my $warnings = 0; 161my $warnings = 0;
160my $anon_struct_union = 0; 162my $anon_struct_union = 0;
@@ -218,21 +220,14 @@ my %highlights_list = ( $type_constant, "\$1",
218 $type_param, "\$1" ); 220 $type_param, "\$1" );
219my $blankline_list = ""; 221my $blankline_list = "";
220 222
221sub usage {
222 print "Usage: $0 [ -v ] [ -docbook | -html | -text | -man | -list ]\n";
223 print " [ -no-doc-sections ]\n";
224 print " [ -function funcname [ -function funcname ...] ]\n";
225 print " [ -nofunction funcname [ -nofunction funcname ...] ]\n";
226 print " c source file(s) > outputfile\n";
227 print " -v : verbose output, more warnings & other info listed\n";
228 exit 1;
229}
230
231# read arguments 223# read arguments
232if ($#ARGV == -1) { 224if ($#ARGV == -1) {
233 usage(); 225 usage();
234} 226}
235 227
228my $kernelversion;
229my $dohighlight = "";
230
236my $verbose = 0; 231my $verbose = 0;
237my $output_mode = "man"; 232my $output_mode = "man";
238my $no_doc_sections = 0; 233my $no_doc_sections = 0;
@@ -245,7 +240,7 @@ my $man_date = ('January', 'February', 'March', 'April', 'May', 'June',
245 'November', 'December')[(localtime)[4]] . 240 'November', 'December')[(localtime)[4]] .
246 " " . ((localtime)[5]+1900); 241 " " . ((localtime)[5]+1900);
247 242
248# Essentially these are globals 243# Essentially these are globals.
249# They probably want to be tidied up, made more localised or something. 244# They probably want to be tidied up, made more localised or something.
250# CAVEAT EMPTOR! Some of the others I localised may not want to be, which 245# CAVEAT EMPTOR! Some of the others I localised may not want to be, which
251# could cause "use of undefined value" or other bugs. 246# could cause "use of undefined value" or other bugs.
@@ -353,6 +348,18 @@ while ($ARGV[0] =~ m/^-(.*)/) {
353 } 348 }
354} 349}
355 350
351# continue execution near EOF;
352
353sub usage {
354 print "Usage: $0 [ -v ] [ -docbook | -html | -text | -man | -list ]\n";
355 print " [ -no-doc-sections ]\n";
356 print " [ -function funcname [ -function funcname ...] ]\n";
357 print " [ -nofunction funcname [ -nofunction funcname ...] ]\n";
358 print " c source file(s) > outputfile\n";
359 print " -v : verbose output, more warnings & other info listed\n";
360 exit 1;
361}
362
356# get kernel version from env 363# get kernel version from env
357sub get_kernel_version() { 364sub get_kernel_version() {
358 my $version = 'unknown kernel version'; 365 my $version = 'unknown kernel version';
@@ -362,15 +369,6 @@ sub get_kernel_version() {
362 } 369 }
363 return $version; 370 return $version;
364} 371}
365my $kernelversion = get_kernel_version();
366
367# generate a sequence of code that will splice in highlighting information
368# using the s// operator.
369my $dohighlight = "";
370foreach my $pattern (keys %highlights) {
371# print STDERR "scanning pattern:$pattern, highlight:($highlights{$pattern})\n";
372 $dohighlight .= "\$contents =~ s:$pattern:$highlights{$pattern}:gs;\n";
373}
374 372
375## 373##
376# dumps section contents to arrays/hashes intended for that purpose. 374# dumps section contents to arrays/hashes intended for that purpose.
@@ -1851,34 +1849,6 @@ sub dump_function($$) {
1851 }); 1849 });
1852} 1850}
1853 1851
1854sub process_file($);
1855
1856# Read the file that maps relative names to absolute names for
1857# separate source and object directories and for shadow trees.
1858if (open(SOURCE_MAP, "<.tmp_filelist.txt")) {
1859 my ($relname, $absname);
1860 while(<SOURCE_MAP>) {
1861 chop();
1862 ($relname, $absname) = (split())[0..1];
1863 $relname =~ s:^/+::;
1864 $source_map{$relname} = $absname;
1865 }
1866 close(SOURCE_MAP);
1867}
1868
1869foreach (@ARGV) {
1870 chomp;
1871 process_file($_);
1872}
1873if ($verbose && $errors) {
1874 print STDERR "$errors errors\n";
1875}
1876if ($verbose && $warnings) {
1877 print STDERR "$warnings warnings\n";
1878}
1879
1880exit($errors);
1881
1882sub reset_state { 1852sub reset_state {
1883 $function = ""; 1853 $function = "";
1884 %constants = (); 1854 %constants = ();
@@ -2285,3 +2255,39 @@ sub process_file($) {
2285 } 2255 }
2286 } 2256 }
2287} 2257}
2258
2259
2260$kernelversion = get_kernel_version();
2261
2262# generate a sequence of code that will splice in highlighting information
2263# using the s// operator.
2264foreach my $pattern (keys %highlights) {
2265# print STDERR "scanning pattern:$pattern, highlight:($highlights{$pattern})\n";
2266 $dohighlight .= "\$contents =~ s:$pattern:$highlights{$pattern}:gs;\n";
2267}
2268
2269# Read the file that maps relative names to absolute names for
2270# separate source and object directories and for shadow trees.
2271if (open(SOURCE_MAP, "<.tmp_filelist.txt")) {
2272 my ($relname, $absname);
2273 while(<SOURCE_MAP>) {
2274 chop();
2275 ($relname, $absname) = (split())[0..1];
2276 $relname =~ s:^/+::;
2277 $source_map{$relname} = $absname;
2278 }
2279 close(SOURCE_MAP);
2280}
2281
2282foreach (@ARGV) {
2283 chomp;
2284 process_file($_);
2285}
2286if ($verbose && $errors) {
2287 print STDERR "$errors errors\n";
2288}
2289if ($verbose && $warnings) {
2290 print STDERR "$warnings warnings\n";
2291}
2292
2293exit($errors);
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index aef8c0a923ab..d661afbe474c 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -253,6 +253,8 @@ static int ima_lsm_rule_init(struct ima_measure_rule_entry *entry,
253 result = security_filter_rule_init(entry->lsm[lsm_rule].type, 253 result = security_filter_rule_init(entry->lsm[lsm_rule].type,
254 Audit_equal, args, 254 Audit_equal, args,
255 &entry->lsm[lsm_rule].rule); 255 &entry->lsm[lsm_rule].rule);
256 if (!entry->lsm[lsm_rule].rule)
257 return -EINVAL;
256 return result; 258 return result;
257} 259}
258 260
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 0088dd8bf68a..0ea52d25a6bd 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -403,7 +403,6 @@ link_check_failed:
403 return ret; 403 return ret;
404 404
405link_prealloc_failed: 405link_prealloc_failed:
406 up_write(&dest_keyring->sem);
407 mutex_unlock(&user->cons_lock); 406 mutex_unlock(&user->cons_lock);
408 kleave(" = %d [prelink]", ret); 407 kleave(" = %d [prelink]", ret);
409 return ret; 408 return ret;
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index b75db8e9cc0f..11446a1506da 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -1070,8 +1070,10 @@ int snd_pcm_hw_rule_add(struct snd_pcm_runtime *runtime, unsigned int cond,
1070 struct snd_pcm_hw_rule *new; 1070 struct snd_pcm_hw_rule *new;
1071 unsigned int new_rules = constrs->rules_all + 16; 1071 unsigned int new_rules = constrs->rules_all + 16;
1072 new = kcalloc(new_rules, sizeof(*c), GFP_KERNEL); 1072 new = kcalloc(new_rules, sizeof(*c), GFP_KERNEL);
1073 if (!new) 1073 if (!new) {
1074 va_end(args);
1074 return -ENOMEM; 1075 return -ENOMEM;
1076 }
1075 if (constrs->rules) { 1077 if (constrs->rules) {
1076 memcpy(new, constrs->rules, 1078 memcpy(new, constrs->rules,
1077 constrs->rules_num * sizeof(*c)); 1079 constrs->rules_num * sizeof(*c));
@@ -1087,8 +1089,10 @@ int snd_pcm_hw_rule_add(struct snd_pcm_runtime *runtime, unsigned int cond,
1087 c->private = private; 1089 c->private = private;
1088 k = 0; 1090 k = 0;
1089 while (1) { 1091 while (1) {
1090 if (snd_BUG_ON(k >= ARRAY_SIZE(c->deps))) 1092 if (snd_BUG_ON(k >= ARRAY_SIZE(c->deps))) {
1093 va_end(args);
1091 return -EINVAL; 1094 return -EINVAL;
1095 }
1092 c->deps[k++] = dep; 1096 c->deps[k++] = dep;
1093 if (dep < 0) 1097 if (dep < 0)
1094 break; 1098 break;
@@ -1097,7 +1101,7 @@ int snd_pcm_hw_rule_add(struct snd_pcm_runtime *runtime, unsigned int cond,
1097 constrs->rules_num++; 1101 constrs->rules_num++;
1098 va_end(args); 1102 va_end(args);
1099 return 0; 1103 return 0;
1100} 1104}
1101 1105
1102EXPORT_SYMBOL(snd_pcm_hw_rule_add); 1106EXPORT_SYMBOL(snd_pcm_hw_rule_add);
1103 1107
diff --git a/sound/oss/soundcard.c b/sound/oss/soundcard.c
index 46c0d03dbecc..fcb14a099822 100644
--- a/sound/oss/soundcard.c
+++ b/sound/oss/soundcard.c
@@ -87,7 +87,7 @@ int *load_mixer_volumes(char *name, int *levels, int present)
87 int i, n; 87 int i, n;
88 88
89 for (i = 0; i < num_mixer_volumes; i++) { 89 for (i = 0; i < num_mixer_volumes; i++) {
90 if (strcmp(name, mixer_vols[i].name) == 0) { 90 if (strncmp(name, mixer_vols[i].name, 32) == 0) {
91 if (present) 91 if (present)
92 mixer_vols[i].num = i; 92 mixer_vols[i].num = i;
93 return mixer_vols[i].levels; 93 return mixer_vols[i].levels;
@@ -99,7 +99,7 @@ int *load_mixer_volumes(char *name, int *levels, int present)
99 } 99 }
100 n = num_mixer_volumes++; 100 n = num_mixer_volumes++;
101 101
102 strcpy(mixer_vols[n].name, name); 102 strncpy(mixer_vols[n].name, name, 32);
103 103
104 if (present) 104 if (present)
105 mixer_vols[n].num = n; 105 mixer_vols[n].num = n;
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 644e3f14f8ca..98b6d02a36c9 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -1919,6 +1919,16 @@ struct snd_kcontrol *snd_hda_find_mixer_ctl(struct hda_codec *codec,
1919} 1919}
1920EXPORT_SYMBOL_HDA(snd_hda_find_mixer_ctl); 1920EXPORT_SYMBOL_HDA(snd_hda_find_mixer_ctl);
1921 1921
1922static int find_empty_mixer_ctl_idx(struct hda_codec *codec, const char *name)
1923{
1924 int idx;
1925 for (idx = 0; idx < 16; idx++) { /* 16 ctlrs should be large enough */
1926 if (!_snd_hda_find_mixer_ctl(codec, name, idx))
1927 return idx;
1928 }
1929 return -EBUSY;
1930}
1931
1922/** 1932/**
1923 * snd_hda_ctl_add - Add a control element and assign to the codec 1933 * snd_hda_ctl_add - Add a control element and assign to the codec
1924 * @codec: HD-audio codec 1934 * @codec: HD-audio codec
@@ -2654,8 +2664,6 @@ static struct snd_kcontrol_new dig_mixes[] = {
2654 { } /* end */ 2664 { } /* end */
2655}; 2665};
2656 2666
2657#define SPDIF_MAX_IDX 4 /* 4 instances should be enough to probe */
2658
2659/** 2667/**
2660 * snd_hda_create_spdif_out_ctls - create Output SPDIF-related controls 2668 * snd_hda_create_spdif_out_ctls - create Output SPDIF-related controls
2661 * @codec: the HDA codec 2669 * @codec: the HDA codec
@@ -2673,12 +2681,8 @@ int snd_hda_create_spdif_out_ctls(struct hda_codec *codec, hda_nid_t nid)
2673 struct snd_kcontrol_new *dig_mix; 2681 struct snd_kcontrol_new *dig_mix;
2674 int idx; 2682 int idx;
2675 2683
2676 for (idx = 0; idx < SPDIF_MAX_IDX; idx++) { 2684 idx = find_empty_mixer_ctl_idx(codec, "IEC958 Playback Switch");
2677 if (!_snd_hda_find_mixer_ctl(codec, "IEC958 Playback Switch", 2685 if (idx < 0) {
2678 idx))
2679 break;
2680 }
2681 if (idx >= SPDIF_MAX_IDX) {
2682 printk(KERN_ERR "hda_codec: too many IEC958 outputs\n"); 2686 printk(KERN_ERR "hda_codec: too many IEC958 outputs\n");
2683 return -EBUSY; 2687 return -EBUSY;
2684 } 2688 }
@@ -2829,12 +2833,8 @@ int snd_hda_create_spdif_in_ctls(struct hda_codec *codec, hda_nid_t nid)
2829 struct snd_kcontrol_new *dig_mix; 2833 struct snd_kcontrol_new *dig_mix;
2830 int idx; 2834 int idx;
2831 2835
2832 for (idx = 0; idx < SPDIF_MAX_IDX; idx++) { 2836 idx = find_empty_mixer_ctl_idx(codec, "IEC958 Capture Switch");
2833 if (!_snd_hda_find_mixer_ctl(codec, "IEC958 Capture Switch", 2837 if (idx < 0) {
2834 idx))
2835 break;
2836 }
2837 if (idx >= SPDIF_MAX_IDX) {
2838 printk(KERN_ERR "hda_codec: too many IEC958 inputs\n"); 2838 printk(KERN_ERR "hda_codec: too many IEC958 inputs\n");
2839 return -EBUSY; 2839 return -EBUSY;
2840 } 2840 }
@@ -3808,21 +3808,32 @@ int snd_hda_add_new_ctls(struct hda_codec *codec, struct snd_kcontrol_new *knew)
3808 3808
3809 for (; knew->name; knew++) { 3809 for (; knew->name; knew++) {
3810 struct snd_kcontrol *kctl; 3810 struct snd_kcontrol *kctl;
3811 int addr = 0, idx = 0;
3811 if (knew->iface == -1) /* skip this codec private value */ 3812 if (knew->iface == -1) /* skip this codec private value */
3812 continue; 3813 continue;
3813 kctl = snd_ctl_new1(knew, codec); 3814 for (;;) {
3814 if (!kctl)
3815 return -ENOMEM;
3816 err = snd_hda_ctl_add(codec, 0, kctl);
3817 if (err < 0) {
3818 if (!codec->addr)
3819 return err;
3820 kctl = snd_ctl_new1(knew, codec); 3815 kctl = snd_ctl_new1(knew, codec);
3821 if (!kctl) 3816 if (!kctl)
3822 return -ENOMEM; 3817 return -ENOMEM;
3823 kctl->id.device = codec->addr; 3818 if (addr > 0)
3819 kctl->id.device = addr;
3820 if (idx > 0)
3821 kctl->id.index = idx;
3824 err = snd_hda_ctl_add(codec, 0, kctl); 3822 err = snd_hda_ctl_add(codec, 0, kctl);
3825 if (err < 0) 3823 if (!err)
3824 break;
3825 /* try first with another device index corresponding to
3826 * the codec addr; if it still fails (or it's the
3827 * primary codec), then try another control index
3828 */
3829 if (!addr && codec->addr)
3830 addr = codec->addr;
3831 else if (!idx && !knew->index) {
3832 idx = find_empty_mixer_ctl_idx(codec,
3833 knew->name);
3834 if (idx <= 0)
3835 return err;
3836 } else
3826 return err; 3837 return err;
3827 } 3838 }
3828 } 3839 }
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index b030c8eba21f..a1c4008af891 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2300,6 +2300,7 @@ static struct snd_pci_quirk position_fix_list[] __devinitdata = {
2300 SND_PCI_QUIRK(0x1028, 0x01cc, "Dell D820", POS_FIX_LPIB), 2300 SND_PCI_QUIRK(0x1028, 0x01cc, "Dell D820", POS_FIX_LPIB),
2301 SND_PCI_QUIRK(0x1028, 0x01de, "Dell Precision 390", POS_FIX_LPIB), 2301 SND_PCI_QUIRK(0x1028, 0x01de, "Dell Precision 390", POS_FIX_LPIB),
2302 SND_PCI_QUIRK(0x1028, 0x01f6, "Dell Latitude 131L", POS_FIX_LPIB), 2302 SND_PCI_QUIRK(0x1028, 0x01f6, "Dell Latitude 131L", POS_FIX_LPIB),
2303 SND_PCI_QUIRK(0x1028, 0x0470, "Dell Inspiron 1120", POS_FIX_LPIB),
2303 SND_PCI_QUIRK(0x103c, 0x306d, "HP dv3", POS_FIX_LPIB), 2304 SND_PCI_QUIRK(0x103c, 0x306d, "HP dv3", POS_FIX_LPIB),
2304 SND_PCI_QUIRK(0x1043, 0x813d, "ASUS P5AD2", POS_FIX_LPIB), 2305 SND_PCI_QUIRK(0x1043, 0x813d, "ASUS P5AD2", POS_FIX_LPIB),
2305 SND_PCI_QUIRK(0x1043, 0x81b3, "ASUS", POS_FIX_LPIB), 2306 SND_PCI_QUIRK(0x1043, 0x81b3, "ASUS", POS_FIX_LPIB),
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 427da45d7906..552a09e9211f 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -14806,8 +14806,9 @@ static int alc269_resume(struct hda_codec *codec)
14806 14806
14807enum { 14807enum {
14808 ALC269_FIXUP_SONY_VAIO, 14808 ALC269_FIXUP_SONY_VAIO,
14809 ALC275_FIX_SONY_VAIO_GPIO2,
14809 ALC269_FIXUP_DELL_M101Z, 14810 ALC269_FIXUP_DELL_M101Z,
14810 ALC269_FIXUP_LENOVO_EDGE14, 14811 ALC269_FIXUP_SKU_IGNORE,
14811 ALC269_FIXUP_ASUS_G73JW, 14812 ALC269_FIXUP_ASUS_G73JW,
14812}; 14813};
14813 14814
@@ -14818,6 +14819,14 @@ static const struct alc_fixup alc269_fixups[] = {
14818 {} 14819 {}
14819 } 14820 }
14820 }, 14821 },
14822 [ALC275_FIX_SONY_VAIO_GPIO2] = {
14823 .verbs = (const struct hda_verb[]) {
14824 {0x01, AC_VERB_SET_GPIO_MASK, 0x04},
14825 {0x01, AC_VERB_SET_GPIO_DIRECTION, 0x04},
14826 {0x01, AC_VERB_SET_GPIO_DATA, 0x00},
14827 { }
14828 }
14829 },
14821 [ALC269_FIXUP_DELL_M101Z] = { 14830 [ALC269_FIXUP_DELL_M101Z] = {
14822 .verbs = (const struct hda_verb[]) { 14831 .verbs = (const struct hda_verb[]) {
14823 /* Enables internal speaker */ 14832 /* Enables internal speaker */
@@ -14826,7 +14835,7 @@ static const struct alc_fixup alc269_fixups[] = {
14826 {} 14835 {}
14827 } 14836 }
14828 }, 14837 },
14829 [ALC269_FIXUP_LENOVO_EDGE14] = { 14838 [ALC269_FIXUP_SKU_IGNORE] = {
14830 .sku = ALC_FIXUP_SKU_IGNORE, 14839 .sku = ALC_FIXUP_SKU_IGNORE,
14831 }, 14840 },
14832 [ALC269_FIXUP_ASUS_G73JW] = { 14841 [ALC269_FIXUP_ASUS_G73JW] = {
@@ -14838,9 +14847,13 @@ static const struct alc_fixup alc269_fixups[] = {
14838}; 14847};
14839 14848
14840static struct snd_pci_quirk alc269_fixup_tbl[] = { 14849static struct snd_pci_quirk alc269_fixup_tbl[] = {
14850 SND_PCI_QUIRK(0x104d, 0x9073, "Sony VAIO", ALC275_FIX_SONY_VAIO_GPIO2),
14851 SND_PCI_QUIRK(0x104d, 0x907b, "Sony VAIO", ALC275_FIX_SONY_VAIO_GPIO2),
14852 SND_PCI_QUIRK(0x104d, 0x9084, "Sony VAIO", ALC275_FIX_SONY_VAIO_GPIO2),
14841 SND_PCI_QUIRK_VENDOR(0x104d, "Sony VAIO", ALC269_FIXUP_SONY_VAIO), 14853 SND_PCI_QUIRK_VENDOR(0x104d, "Sony VAIO", ALC269_FIXUP_SONY_VAIO),
14842 SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z), 14854 SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z),
14843 SND_PCI_QUIRK(0x17aa, 0x21b8, "Thinkpad Edge 14", ALC269_FIXUP_LENOVO_EDGE14), 14855 SND_PCI_QUIRK(0x17aa, 0x21b8, "Thinkpad Edge 14", ALC269_FIXUP_SKU_IGNORE),
14856 SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE),
14844 SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW), 14857 SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW),
14845 {} 14858 {}
14846}; 14859};
@@ -15091,28 +15104,29 @@ static int patch_alc269(struct hda_codec *codec)
15091 15104
15092 alc_auto_parse_customize_define(codec); 15105 alc_auto_parse_customize_define(codec);
15093 15106
15094 coef = alc_read_coef_idx(codec, 0); 15107 if (codec->vendor_id == 0x10ec0269) {
15095 if ((coef & 0x00f0) == 0x0010) { 15108 coef = alc_read_coef_idx(codec, 0);
15096 if (codec->bus->pci->subsystem_vendor == 0x1025 && 15109 if ((coef & 0x00f0) == 0x0010) {
15097 spec->cdefine.platform_type == 1) { 15110 if (codec->bus->pci->subsystem_vendor == 0x1025 &&
15098 alc_codec_rename(codec, "ALC271X"); 15111 spec->cdefine.platform_type == 1) {
15099 spec->codec_variant = ALC269_TYPE_ALC271X; 15112 alc_codec_rename(codec, "ALC271X");
15100 } else if ((coef & 0xf000) == 0x1000) { 15113 spec->codec_variant = ALC269_TYPE_ALC271X;
15101 spec->codec_variant = ALC269_TYPE_ALC270; 15114 } else if ((coef & 0xf000) == 0x1000) {
15102 } else if ((coef & 0xf000) == 0x2000) { 15115 spec->codec_variant = ALC269_TYPE_ALC270;
15103 alc_codec_rename(codec, "ALC259"); 15116 } else if ((coef & 0xf000) == 0x2000) {
15104 spec->codec_variant = ALC269_TYPE_ALC259; 15117 alc_codec_rename(codec, "ALC259");
15105 } else if ((coef & 0xf000) == 0x3000) { 15118 spec->codec_variant = ALC269_TYPE_ALC259;
15106 alc_codec_rename(codec, "ALC258"); 15119 } else if ((coef & 0xf000) == 0x3000) {
15107 spec->codec_variant = ALC269_TYPE_ALC258; 15120 alc_codec_rename(codec, "ALC258");
15108 } else { 15121 spec->codec_variant = ALC269_TYPE_ALC258;
15109 alc_codec_rename(codec, "ALC269VB"); 15122 } else {
15110 spec->codec_variant = ALC269_TYPE_ALC269VB; 15123 alc_codec_rename(codec, "ALC269VB");
15111 } 15124 spec->codec_variant = ALC269_TYPE_ALC269VB;
15112 } else 15125 }
15113 alc_fix_pll_init(codec, 0x20, 0x04, 15); 15126 } else
15114 15127 alc_fix_pll_init(codec, 0x20, 0x04, 15);
15115 alc269_fill_coef(codec); 15128 alc269_fill_coef(codec);
15129 }
15116 15130
15117 board_config = snd_hda_check_board_config(codec, ALC269_MODEL_LAST, 15131 board_config = snd_hda_check_board_config(codec, ALC269_MODEL_LAST,
15118 alc269_models, 15132 alc269_models,
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index efa4225f5fd6..f03b2ff90496 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -3481,6 +3481,8 @@ static int stac92xx_auto_create_dmic_input_ctls(struct hda_codec *codec,
3481 3481
3482 label = hda_get_input_pin_label(codec, nid, 1); 3482 label = hda_get_input_pin_label(codec, nid, 1);
3483 snd_hda_add_imux_item(dimux, label, index, &type_idx); 3483 snd_hda_add_imux_item(dimux, label, index, &type_idx);
3484 if (snd_hda_get_bool_hint(codec, "separate_dmux") != 1)
3485 snd_hda_add_imux_item(imux, label, index, &type_idx);
3484 3486
3485 err = create_elem_capture_vol(codec, nid, label, type_idx, 3487 err = create_elem_capture_vol(codec, nid, label, type_idx,
3486 HDA_INPUT); 3488 HDA_INPUT);
@@ -3492,9 +3494,6 @@ static int stac92xx_auto_create_dmic_input_ctls(struct hda_codec *codec,
3492 if (err < 0) 3494 if (err < 0)
3493 return err; 3495 return err;
3494 } 3496 }
3495
3496 if (snd_hda_get_bool_hint(codec, "separate_dmux") != 1)
3497 snd_hda_add_imux_item(imux, label, index, NULL);
3498 } 3497 }
3499 3498
3500 return 0; 3499 return 0;
diff --git a/sound/soc/codecs/max98088.c b/sound/soc/codecs/max98088.c
index d63e28773eb1..6447dbb2f123 100644
--- a/sound/soc/codecs/max98088.c
+++ b/sound/soc/codecs/max98088.c
@@ -40,7 +40,6 @@ struct max98088_cdata {
40}; 40};
41 41
42struct max98088_priv { 42struct max98088_priv {
43 u8 reg_cache[M98088_REG_CNT];
44 enum max98088_type devtype; 43 enum max98088_type devtype;
45 void *control_data; 44 void *control_data;
46 struct max98088_pdata *pdata; 45 struct max98088_pdata *pdata;
@@ -1588,7 +1587,7 @@ static int max98088_dai2_set_fmt(struct snd_soc_dai *codec_dai,
1588 1587
1589static void max98088_sync_cache(struct snd_soc_codec *codec) 1588static void max98088_sync_cache(struct snd_soc_codec *codec)
1590{ 1589{
1591 struct max98088_priv *max98088 = snd_soc_codec_get_drvdata(codec); 1590 u16 *reg_cache = codec->reg_cache;
1592 int i; 1591 int i;
1593 1592
1594 if (!codec->cache_sync) 1593 if (!codec->cache_sync)
@@ -1599,14 +1598,14 @@ static void max98088_sync_cache(struct snd_soc_codec *codec)
1599 /* write back cached values if they're writeable and 1598 /* write back cached values if they're writeable and
1600 * different from the hardware default. 1599 * different from the hardware default.
1601 */ 1600 */
1602 for (i = 1; i < ARRAY_SIZE(max98088->reg_cache); i++) { 1601 for (i = 1; i < codec->driver->reg_cache_size; i++) {
1603 if (!max98088_access[i].writable) 1602 if (!max98088_access[i].writable)
1604 continue; 1603 continue;
1605 1604
1606 if (max98088->reg_cache[i] == max98088_reg[i]) 1605 if (reg_cache[i] == max98088_reg[i])
1607 continue; 1606 continue;
1608 1607
1609 snd_soc_write(codec, i, max98088->reg_cache[i]); 1608 snd_soc_write(codec, i, reg_cache[i]);
1610 } 1609 }
1611 1610
1612 codec->cache_sync = 0; 1611 codec->cache_sync = 0;
@@ -1951,7 +1950,6 @@ static int max98088_probe(struct snd_soc_codec *codec)
1951 int ret = 0; 1950 int ret = 0;
1952 1951
1953 codec->cache_sync = 1; 1952 codec->cache_sync = 1;
1954 memcpy(codec->reg_cache, max98088_reg, sizeof(max98088_reg));
1955 1953
1956 ret = snd_soc_codec_set_cache_io(codec, 8, 8, SND_SOC_I2C); 1954 ret = snd_soc_codec_set_cache_io(codec, 8, 8, SND_SOC_I2C);
1957 if (ret != 0) { 1955 if (ret != 0) {
diff --git a/sound/soc/codecs/wm8523.c b/sound/soc/codecs/wm8523.c
index 9a433a5396cb..deca79ea2b4b 100644
--- a/sound/soc/codecs/wm8523.c
+++ b/sound/soc/codecs/wm8523.c
@@ -41,7 +41,6 @@ static const char *wm8523_supply_names[WM8523_NUM_SUPPLIES] = {
41/* codec private data */ 41/* codec private data */
42struct wm8523_priv { 42struct wm8523_priv {
43 enum snd_soc_control_type control_type; 43 enum snd_soc_control_type control_type;
44 u16 reg_cache[WM8523_REGISTER_COUNT];
45 struct regulator_bulk_data supplies[WM8523_NUM_SUPPLIES]; 44 struct regulator_bulk_data supplies[WM8523_NUM_SUPPLIES];
46 unsigned int sysclk; 45 unsigned int sysclk;
47 unsigned int rate_constraint_list[WM8523_NUM_RATES]; 46 unsigned int rate_constraint_list[WM8523_NUM_RATES];
@@ -314,6 +313,7 @@ static int wm8523_set_bias_level(struct snd_soc_codec *codec,
314 enum snd_soc_bias_level level) 313 enum snd_soc_bias_level level)
315{ 314{
316 struct wm8523_priv *wm8523 = snd_soc_codec_get_drvdata(codec); 315 struct wm8523_priv *wm8523 = snd_soc_codec_get_drvdata(codec);
316 u16 *reg_cache = codec->reg_cache;
317 int ret, i; 317 int ret, i;
318 318
319 switch (level) { 319 switch (level) {
@@ -344,7 +344,7 @@ static int wm8523_set_bias_level(struct snd_soc_codec *codec,
344 /* Sync back default/cached values */ 344 /* Sync back default/cached values */
345 for (i = WM8523_AIF_CTRL1; 345 for (i = WM8523_AIF_CTRL1;
346 i < WM8523_MAX_REGISTER; i++) 346 i < WM8523_MAX_REGISTER; i++)
347 snd_soc_write(codec, i, wm8523->reg_cache[i]); 347 snd_soc_write(codec, i, reg_cache[i]);
348 348
349 349
350 msleep(100); 350 msleep(100);
@@ -414,6 +414,7 @@ static int wm8523_resume(struct snd_soc_codec *codec)
414static int wm8523_probe(struct snd_soc_codec *codec) 414static int wm8523_probe(struct snd_soc_codec *codec)
415{ 415{
416 struct wm8523_priv *wm8523 = snd_soc_codec_get_drvdata(codec); 416 struct wm8523_priv *wm8523 = snd_soc_codec_get_drvdata(codec);
417 u16 *reg_cache = codec->reg_cache;
417 int ret, i; 418 int ret, i;
418 419
419 codec->hw_write = (hw_write_t)i2c_master_send; 420 codec->hw_write = (hw_write_t)i2c_master_send;
@@ -470,8 +471,8 @@ static int wm8523_probe(struct snd_soc_codec *codec)
470 } 471 }
471 472
472 /* Change some default settings - latch VU and enable ZC */ 473 /* Change some default settings - latch VU and enable ZC */
473 wm8523->reg_cache[WM8523_DAC_GAINR] |= WM8523_DACR_VU; 474 reg_cache[WM8523_DAC_GAINR] |= WM8523_DACR_VU;
474 wm8523->reg_cache[WM8523_DAC_CTRL3] |= WM8523_ZC; 475 reg_cache[WM8523_DAC_CTRL3] |= WM8523_ZC;
475 476
476 wm8523_set_bias_level(codec, SND_SOC_BIAS_STANDBY); 477 wm8523_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
477 478
diff --git a/sound/soc/codecs/wm8741.c b/sound/soc/codecs/wm8741.c
index 90e31e9aa6f7..aea60ef8aba7 100644
--- a/sound/soc/codecs/wm8741.c
+++ b/sound/soc/codecs/wm8741.c
@@ -41,7 +41,6 @@ static const char *wm8741_supply_names[WM8741_NUM_SUPPLIES] = {
41/* codec private data */ 41/* codec private data */
42struct wm8741_priv { 42struct wm8741_priv {
43 enum snd_soc_control_type control_type; 43 enum snd_soc_control_type control_type;
44 u16 reg_cache[WM8741_REGISTER_COUNT];
45 struct regulator_bulk_data supplies[WM8741_NUM_SUPPLIES]; 44 struct regulator_bulk_data supplies[WM8741_NUM_SUPPLIES];
46 unsigned int sysclk; 45 unsigned int sysclk;
47 struct snd_pcm_hw_constraint_list *sysclk_constraints; 46 struct snd_pcm_hw_constraint_list *sysclk_constraints;
@@ -422,6 +421,7 @@ static int wm8741_resume(struct snd_soc_codec *codec)
422static int wm8741_probe(struct snd_soc_codec *codec) 421static int wm8741_probe(struct snd_soc_codec *codec)
423{ 422{
424 struct wm8741_priv *wm8741 = snd_soc_codec_get_drvdata(codec); 423 struct wm8741_priv *wm8741 = snd_soc_codec_get_drvdata(codec);
424 u16 *reg_cache = codec->reg_cache;
425 int ret = 0; 425 int ret = 0;
426 426
427 ret = snd_soc_codec_set_cache_io(codec, 7, 9, wm8741->control_type); 427 ret = snd_soc_codec_set_cache_io(codec, 7, 9, wm8741->control_type);
@@ -437,10 +437,10 @@ static int wm8741_probe(struct snd_soc_codec *codec)
437 } 437 }
438 438
439 /* Change some default settings - latch VU */ 439 /* Change some default settings - latch VU */
440 wm8741->reg_cache[WM8741_DACLLSB_ATTENUATION] |= WM8741_UPDATELL; 440 reg_cache[WM8741_DACLLSB_ATTENUATION] |= WM8741_UPDATELL;
441 wm8741->reg_cache[WM8741_DACLMSB_ATTENUATION] |= WM8741_UPDATELM; 441 reg_cache[WM8741_DACLMSB_ATTENUATION] |= WM8741_UPDATELM;
442 wm8741->reg_cache[WM8741_DACRLSB_ATTENUATION] |= WM8741_UPDATERL; 442 reg_cache[WM8741_DACRLSB_ATTENUATION] |= WM8741_UPDATERL;
443 wm8741->reg_cache[WM8741_DACRLSB_ATTENUATION] |= WM8741_UPDATERM; 443 reg_cache[WM8741_DACRLSB_ATTENUATION] |= WM8741_UPDATERM;
444 444
445 snd_soc_add_controls(codec, wm8741_snd_controls, 445 snd_soc_add_controls(codec, wm8741_snd_controls,
446 ARRAY_SIZE(wm8741_snd_controls)); 446 ARRAY_SIZE(wm8741_snd_controls));
diff --git a/sound/soc/codecs/wm8753.c b/sound/soc/codecs/wm8753.c
index 8f679a13f2bc..87caae59e939 100644
--- a/sound/soc/codecs/wm8753.c
+++ b/sound/soc/codecs/wm8753.c
@@ -65,22 +65,22 @@ static void wm8753_set_dai_mode(struct snd_soc_codec *codec,
65 * are using 2 wire for device control, so we cache them instead. 65 * are using 2 wire for device control, so we cache them instead.
66 */ 66 */
67static const u16 wm8753_reg[] = { 67static const u16 wm8753_reg[] = {
68 0x0008, 0x0000, 0x000a, 0x000a, 68 0x0000, 0x0008, 0x0000, 0x000a,
69 0x0033, 0x0000, 0x0007, 0x00ff, 69 0x000a, 0x0033, 0x0000, 0x0007,
70 0x00ff, 0x000f, 0x000f, 0x007b, 70 0x00ff, 0x00ff, 0x000f, 0x000f,
71 0x0000, 0x0032, 0x0000, 0x00c3, 71 0x007b, 0x0000, 0x0032, 0x0000,
72 0x00c3, 0x00c0, 0x0000, 0x0000, 72 0x00c3, 0x00c3, 0x00c0, 0x0000,
73 0x0000, 0x0000, 0x0000, 0x0000, 73 0x0000, 0x0000, 0x0000, 0x0000,
74 0x0000, 0x0000, 0x0000, 0x0000, 74 0x0000, 0x0000, 0x0000, 0x0000,
75 0x0000, 0x0000, 0x0000, 0x0055,
76 0x0005, 0x0050, 0x0055, 0x0050,
77 0x0055, 0x0050, 0x0055, 0x0079,
78 0x0079, 0x0079, 0x0079, 0x0079,
79 0x0000, 0x0000, 0x0000, 0x0000, 75 0x0000, 0x0000, 0x0000, 0x0000,
80 0x0097, 0x0097, 0x0000, 0x0004, 76 0x0055, 0x0005, 0x0050, 0x0055,
81 0x0000, 0x0083, 0x0024, 0x01ba, 77 0x0050, 0x0055, 0x0050, 0x0055,
82 0x0000, 0x0083, 0x0024, 0x01ba, 78 0x0079, 0x0079, 0x0079, 0x0079,
83 0x0000, 0x0000, 0x0000 79 0x0079, 0x0000, 0x0000, 0x0000,
80 0x0000, 0x0097, 0x0097, 0x0000,
81 0x0004, 0x0000, 0x0083, 0x0024,
82 0x01ba, 0x0000, 0x0083, 0x0024,
83 0x01ba, 0x0000, 0x0000, 0x0000
84}; 84};
85 85
86/* codec private data */ 86/* codec private data */
@@ -88,57 +88,10 @@ struct wm8753_priv {
88 enum snd_soc_control_type control_type; 88 enum snd_soc_control_type control_type;
89 unsigned int sysclk; 89 unsigned int sysclk;
90 unsigned int pcmclk; 90 unsigned int pcmclk;
91 u16 reg_cache[ARRAY_SIZE(wm8753_reg)];
92 int dai_func; 91 int dai_func;
93}; 92};
94 93
95/* 94#define wm8753_reset(c) snd_soc_write(c, WM8753_RESET, 0)
96 * read wm8753 register cache
97 */
98static inline unsigned int wm8753_read_reg_cache(struct snd_soc_codec *codec,
99 unsigned int reg)
100{
101 u16 *cache = codec->reg_cache;
102 if (reg < 1 || reg >= (ARRAY_SIZE(wm8753_reg) + 1))
103 return -1;
104 return cache[reg - 1];
105}
106
107/*
108 * write wm8753 register cache
109 */
110static inline void wm8753_write_reg_cache(struct snd_soc_codec *codec,
111 unsigned int reg, unsigned int value)
112{
113 u16 *cache = codec->reg_cache;
114 if (reg < 1 || reg >= (ARRAY_SIZE(wm8753_reg) + 1))
115 return;
116 cache[reg - 1] = value;
117}
118
119/*
120 * write to the WM8753 register space
121 */
122static int wm8753_write(struct snd_soc_codec *codec, unsigned int reg,
123 unsigned int value)
124{
125 u8 data[2];
126
127 /* data is
128 * D15..D9 WM8753 register offset
129 * D8...D0 register data
130 */
131 data[0] = (reg << 1) | ((value >> 8) & 0x0001);
132 data[1] = value & 0x00ff;
133
134 wm8753_write_reg_cache(codec, reg, value);
135 if (codec->hw_write(codec->control_data, data, 2) == 2)
136 return 0;
137 else
138 return -EIO;
139}
140
141#define wm8753_reset(c) wm8753_write(c, WM8753_RESET, 0)
142 95
143/* 96/*
144 * WM8753 Controls 97 * WM8753 Controls
@@ -218,7 +171,7 @@ static int wm8753_get_dai(struct snd_kcontrol *kcontrol,
218 struct snd_ctl_elem_value *ucontrol) 171 struct snd_ctl_elem_value *ucontrol)
219{ 172{
220 struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); 173 struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
221 int mode = wm8753_read_reg_cache(codec, WM8753_IOCTL); 174 int mode = snd_soc_read(codec, WM8753_IOCTL);
222 175
223 ucontrol->value.integer.value[0] = (mode & 0xc) >> 2; 176 ucontrol->value.integer.value[0] = (mode & 0xc) >> 2;
224 return 0; 177 return 0;
@@ -228,7 +181,7 @@ static int wm8753_set_dai(struct snd_kcontrol *kcontrol,
228 struct snd_ctl_elem_value *ucontrol) 181 struct snd_ctl_elem_value *ucontrol)
229{ 182{
230 struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); 183 struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
231 int mode = wm8753_read_reg_cache(codec, WM8753_IOCTL); 184 int mode = snd_soc_read(codec, WM8753_IOCTL);
232 struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec); 185 struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec);
233 186
234 if (((mode & 0xc) >> 2) == ucontrol->value.integer.value[0]) 187 if (((mode & 0xc) >> 2) == ucontrol->value.integer.value[0])
@@ -738,17 +691,17 @@ static int wm8753_set_dai_pll(struct snd_soc_dai *codec_dai, int pll_id,
738 if (pll_id == WM8753_PLL1) { 691 if (pll_id == WM8753_PLL1) {
739 offset = 0; 692 offset = 0;
740 enable = 0x10; 693 enable = 0x10;
741 reg = wm8753_read_reg_cache(codec, WM8753_CLOCK) & 0xffef; 694 reg = snd_soc_read(codec, WM8753_CLOCK) & 0xffef;
742 } else { 695 } else {
743 offset = 4; 696 offset = 4;
744 enable = 0x8; 697 enable = 0x8;
745 reg = wm8753_read_reg_cache(codec, WM8753_CLOCK) & 0xfff7; 698 reg = snd_soc_read(codec, WM8753_CLOCK) & 0xfff7;
746 } 699 }
747 700
748 if (!freq_in || !freq_out) { 701 if (!freq_in || !freq_out) {
749 /* disable PLL */ 702 /* disable PLL */
750 wm8753_write(codec, WM8753_PLL1CTL1 + offset, 0x0026); 703 snd_soc_write(codec, WM8753_PLL1CTL1 + offset, 0x0026);
751 wm8753_write(codec, WM8753_CLOCK, reg); 704 snd_soc_write(codec, WM8753_CLOCK, reg);
752 return 0; 705 return 0;
753 } else { 706 } else {
754 u16 value = 0; 707 u16 value = 0;
@@ -759,20 +712,20 @@ static int wm8753_set_dai_pll(struct snd_soc_dai *codec_dai, int pll_id,
759 /* set up N and K PLL divisor ratios */ 712 /* set up N and K PLL divisor ratios */
760 /* bits 8:5 = PLL_N, bits 3:0 = PLL_K[21:18] */ 713 /* bits 8:5 = PLL_N, bits 3:0 = PLL_K[21:18] */
761 value = (pll_div.n << 5) + ((pll_div.k & 0x3c0000) >> 18); 714 value = (pll_div.n << 5) + ((pll_div.k & 0x3c0000) >> 18);
762 wm8753_write(codec, WM8753_PLL1CTL2 + offset, value); 715 snd_soc_write(codec, WM8753_PLL1CTL2 + offset, value);
763 716
764 /* bits 8:0 = PLL_K[17:9] */ 717 /* bits 8:0 = PLL_K[17:9] */
765 value = (pll_div.k & 0x03fe00) >> 9; 718 value = (pll_div.k & 0x03fe00) >> 9;
766 wm8753_write(codec, WM8753_PLL1CTL3 + offset, value); 719 snd_soc_write(codec, WM8753_PLL1CTL3 + offset, value);
767 720
768 /* bits 8:0 = PLL_K[8:0] */ 721 /* bits 8:0 = PLL_K[8:0] */
769 value = pll_div.k & 0x0001ff; 722 value = pll_div.k & 0x0001ff;
770 wm8753_write(codec, WM8753_PLL1CTL4 + offset, value); 723 snd_soc_write(codec, WM8753_PLL1CTL4 + offset, value);
771 724
772 /* set PLL as input and enable */ 725 /* set PLL as input and enable */
773 wm8753_write(codec, WM8753_PLL1CTL1 + offset, 0x0027 | 726 snd_soc_write(codec, WM8753_PLL1CTL1 + offset, 0x0027 |
774 (pll_div.div2 << 3)); 727 (pll_div.div2 << 3));
775 wm8753_write(codec, WM8753_CLOCK, reg | enable); 728 snd_soc_write(codec, WM8753_CLOCK, reg | enable);
776 } 729 }
777 return 0; 730 return 0;
778} 731}
@@ -879,7 +832,7 @@ static int wm8753_vdac_adc_set_dai_fmt(struct snd_soc_dai *codec_dai,
879 unsigned int fmt) 832 unsigned int fmt)
880{ 833{
881 struct snd_soc_codec *codec = codec_dai->codec; 834 struct snd_soc_codec *codec = codec_dai->codec;
882 u16 voice = wm8753_read_reg_cache(codec, WM8753_PCM) & 0x01ec; 835 u16 voice = snd_soc_read(codec, WM8753_PCM) & 0x01ec;
883 836
884 /* interface format */ 837 /* interface format */
885 switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { 838 switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
@@ -901,7 +854,7 @@ static int wm8753_vdac_adc_set_dai_fmt(struct snd_soc_dai *codec_dai,
901 return -EINVAL; 854 return -EINVAL;
902 } 855 }
903 856
904 wm8753_write(codec, WM8753_PCM, voice); 857 snd_soc_write(codec, WM8753_PCM, voice);
905 return 0; 858 return 0;
906} 859}
907 860
@@ -922,8 +875,8 @@ static int wm8753_pcm_hw_params(struct snd_pcm_substream *substream,
922 struct snd_soc_pcm_runtime *rtd = substream->private_data; 875 struct snd_soc_pcm_runtime *rtd = substream->private_data;
923 struct snd_soc_codec *codec = rtd->codec; 876 struct snd_soc_codec *codec = rtd->codec;
924 struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec); 877 struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec);
925 u16 voice = wm8753_read_reg_cache(codec, WM8753_PCM) & 0x01f3; 878 u16 voice = snd_soc_read(codec, WM8753_PCM) & 0x01f3;
926 u16 srate = wm8753_read_reg_cache(codec, WM8753_SRATE1) & 0x017f; 879 u16 srate = snd_soc_read(codec, WM8753_SRATE1) & 0x017f;
927 880
928 /* bit size */ 881 /* bit size */
929 switch (params_format(params)) { 882 switch (params_format(params)) {
@@ -943,9 +896,9 @@ static int wm8753_pcm_hw_params(struct snd_pcm_substream *substream,
943 /* sample rate */ 896 /* sample rate */
944 if (params_rate(params) * 384 == wm8753->pcmclk) 897 if (params_rate(params) * 384 == wm8753->pcmclk)
945 srate |= 0x80; 898 srate |= 0x80;
946 wm8753_write(codec, WM8753_SRATE1, srate); 899 snd_soc_write(codec, WM8753_SRATE1, srate);
947 900
948 wm8753_write(codec, WM8753_PCM, voice); 901 snd_soc_write(codec, WM8753_PCM, voice);
949 return 0; 902 return 0;
950} 903}
951 904
@@ -958,8 +911,8 @@ static int wm8753_pcm_set_dai_fmt(struct snd_soc_dai *codec_dai,
958 struct snd_soc_codec *codec = codec_dai->codec; 911 struct snd_soc_codec *codec = codec_dai->codec;
959 u16 voice, ioctl; 912 u16 voice, ioctl;
960 913
961 voice = wm8753_read_reg_cache(codec, WM8753_PCM) & 0x011f; 914 voice = snd_soc_read(codec, WM8753_PCM) & 0x011f;
962 ioctl = wm8753_read_reg_cache(codec, WM8753_IOCTL) & 0x015d; 915 ioctl = snd_soc_read(codec, WM8753_IOCTL) & 0x015d;
963 916
964 /* set master/slave audio interface */ 917 /* set master/slave audio interface */
965 switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { 918 switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
@@ -1013,8 +966,8 @@ static int wm8753_pcm_set_dai_fmt(struct snd_soc_dai *codec_dai,
1013 return -EINVAL; 966 return -EINVAL;
1014 } 967 }
1015 968
1016 wm8753_write(codec, WM8753_PCM, voice); 969 snd_soc_write(codec, WM8753_PCM, voice);
1017 wm8753_write(codec, WM8753_IOCTL, ioctl); 970 snd_soc_write(codec, WM8753_IOCTL, ioctl);
1018 return 0; 971 return 0;
1019} 972}
1020 973
@@ -1026,16 +979,16 @@ static int wm8753_set_dai_clkdiv(struct snd_soc_dai *codec_dai,
1026 979
1027 switch (div_id) { 980 switch (div_id) {
1028 case WM8753_PCMDIV: 981 case WM8753_PCMDIV:
1029 reg = wm8753_read_reg_cache(codec, WM8753_CLOCK) & 0x003f; 982 reg = snd_soc_read(codec, WM8753_CLOCK) & 0x003f;
1030 wm8753_write(codec, WM8753_CLOCK, reg | div); 983 snd_soc_write(codec, WM8753_CLOCK, reg | div);
1031 break; 984 break;
1032 case WM8753_BCLKDIV: 985 case WM8753_BCLKDIV:
1033 reg = wm8753_read_reg_cache(codec, WM8753_SRATE2) & 0x01c7; 986 reg = snd_soc_read(codec, WM8753_SRATE2) & 0x01c7;
1034 wm8753_write(codec, WM8753_SRATE2, reg | div); 987 snd_soc_write(codec, WM8753_SRATE2, reg | div);
1035 break; 988 break;
1036 case WM8753_VXCLKDIV: 989 case WM8753_VXCLKDIV:
1037 reg = wm8753_read_reg_cache(codec, WM8753_SRATE2) & 0x003f; 990 reg = snd_soc_read(codec, WM8753_SRATE2) & 0x003f;
1038 wm8753_write(codec, WM8753_SRATE2, reg | div); 991 snd_soc_write(codec, WM8753_SRATE2, reg | div);
1039 break; 992 break;
1040 default: 993 default:
1041 return -EINVAL; 994 return -EINVAL;
@@ -1050,7 +1003,7 @@ static int wm8753_hdac_set_dai_fmt(struct snd_soc_dai *codec_dai,
1050 unsigned int fmt) 1003 unsigned int fmt)
1051{ 1004{
1052 struct snd_soc_codec *codec = codec_dai->codec; 1005 struct snd_soc_codec *codec = codec_dai->codec;
1053 u16 hifi = wm8753_read_reg_cache(codec, WM8753_HIFI) & 0x01e0; 1006 u16 hifi = snd_soc_read(codec, WM8753_HIFI) & 0x01e0;
1054 1007
1055 /* interface format */ 1008 /* interface format */
1056 switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { 1009 switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
@@ -1072,7 +1025,7 @@ static int wm8753_hdac_set_dai_fmt(struct snd_soc_dai *codec_dai,
1072 return -EINVAL; 1025 return -EINVAL;
1073 } 1026 }
1074 1027
1075 wm8753_write(codec, WM8753_HIFI, hifi); 1028 snd_soc_write(codec, WM8753_HIFI, hifi);
1076 return 0; 1029 return 0;
1077} 1030}
1078 1031
@@ -1085,8 +1038,8 @@ static int wm8753_i2s_set_dai_fmt(struct snd_soc_dai *codec_dai,
1085 struct snd_soc_codec *codec = codec_dai->codec; 1038 struct snd_soc_codec *codec = codec_dai->codec;
1086 u16 ioctl, hifi; 1039 u16 ioctl, hifi;
1087 1040
1088 hifi = wm8753_read_reg_cache(codec, WM8753_HIFI) & 0x011f; 1041 hifi = snd_soc_read(codec, WM8753_HIFI) & 0x011f;
1089 ioctl = wm8753_read_reg_cache(codec, WM8753_IOCTL) & 0x00ae; 1042 ioctl = snd_soc_read(codec, WM8753_IOCTL) & 0x00ae;
1090 1043
1091 /* set master/slave audio interface */ 1044 /* set master/slave audio interface */
1092 switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { 1045 switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
@@ -1140,8 +1093,8 @@ static int wm8753_i2s_set_dai_fmt(struct snd_soc_dai *codec_dai,
1140 return -EINVAL; 1093 return -EINVAL;
1141 } 1094 }
1142 1095
1143 wm8753_write(codec, WM8753_HIFI, hifi); 1096 snd_soc_write(codec, WM8753_HIFI, hifi);
1144 wm8753_write(codec, WM8753_IOCTL, ioctl); 1097 snd_soc_write(codec, WM8753_IOCTL, ioctl);
1145 return 0; 1098 return 0;
1146} 1099}
1147 1100
@@ -1162,8 +1115,8 @@ static int wm8753_i2s_hw_params(struct snd_pcm_substream *substream,
1162 struct snd_soc_pcm_runtime *rtd = substream->private_data; 1115 struct snd_soc_pcm_runtime *rtd = substream->private_data;
1163 struct snd_soc_codec *codec = rtd->codec; 1116 struct snd_soc_codec *codec = rtd->codec;
1164 struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec); 1117 struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec);
1165 u16 srate = wm8753_read_reg_cache(codec, WM8753_SRATE1) & 0x01c0; 1118 u16 srate = snd_soc_read(codec, WM8753_SRATE1) & 0x01c0;
1166 u16 hifi = wm8753_read_reg_cache(codec, WM8753_HIFI) & 0x01f3; 1119 u16 hifi = snd_soc_read(codec, WM8753_HIFI) & 0x01f3;
1167 int coeff; 1120 int coeff;
1168 1121
1169 /* is digital filter coefficient valid ? */ 1122 /* is digital filter coefficient valid ? */
@@ -1172,7 +1125,7 @@ static int wm8753_i2s_hw_params(struct snd_pcm_substream *substream,
1172 printk(KERN_ERR "wm8753 invalid MCLK or rate\n"); 1125 printk(KERN_ERR "wm8753 invalid MCLK or rate\n");
1173 return coeff; 1126 return coeff;
1174 } 1127 }
1175 wm8753_write(codec, WM8753_SRATE1, srate | (coeff_div[coeff].sr << 1) | 1128 snd_soc_write(codec, WM8753_SRATE1, srate | (coeff_div[coeff].sr << 1) |
1176 coeff_div[coeff].usb); 1129 coeff_div[coeff].usb);
1177 1130
1178 /* bit size */ 1131 /* bit size */
@@ -1190,7 +1143,7 @@ static int wm8753_i2s_hw_params(struct snd_pcm_substream *substream,
1190 break; 1143 break;
1191 } 1144 }
1192 1145
1193 wm8753_write(codec, WM8753_HIFI, hifi); 1146 snd_soc_write(codec, WM8753_HIFI, hifi);
1194 return 0; 1147 return 0;
1195} 1148}
1196 1149
@@ -1201,8 +1154,8 @@ static int wm8753_mode1v_set_dai_fmt(struct snd_soc_dai *codec_dai,
1201 u16 clock; 1154 u16 clock;
1202 1155
1203 /* set clk source as pcmclk */ 1156 /* set clk source as pcmclk */
1204 clock = wm8753_read_reg_cache(codec, WM8753_CLOCK) & 0xfffb; 1157 clock = snd_soc_read(codec, WM8753_CLOCK) & 0xfffb;
1205 wm8753_write(codec, WM8753_CLOCK, clock); 1158 snd_soc_write(codec, WM8753_CLOCK, clock);
1206 1159
1207 if (wm8753_vdac_adc_set_dai_fmt(codec_dai, fmt) < 0) 1160 if (wm8753_vdac_adc_set_dai_fmt(codec_dai, fmt) < 0)
1208 return -EINVAL; 1161 return -EINVAL;
@@ -1224,8 +1177,8 @@ static int wm8753_mode2_set_dai_fmt(struct snd_soc_dai *codec_dai,
1224 u16 clock; 1177 u16 clock;
1225 1178
1226 /* set clk source as pcmclk */ 1179 /* set clk source as pcmclk */
1227 clock = wm8753_read_reg_cache(codec, WM8753_CLOCK) & 0xfffb; 1180 clock = snd_soc_read(codec, WM8753_CLOCK) & 0xfffb;
1228 wm8753_write(codec, WM8753_CLOCK, clock); 1181 snd_soc_write(codec, WM8753_CLOCK, clock);
1229 1182
1230 if (wm8753_vdac_adc_set_dai_fmt(codec_dai, fmt) < 0) 1183 if (wm8753_vdac_adc_set_dai_fmt(codec_dai, fmt) < 0)
1231 return -EINVAL; 1184 return -EINVAL;
@@ -1239,8 +1192,8 @@ static int wm8753_mode3_4_set_dai_fmt(struct snd_soc_dai *codec_dai,
1239 u16 clock; 1192 u16 clock;
1240 1193
1241 /* set clk source as mclk */ 1194 /* set clk source as mclk */
1242 clock = wm8753_read_reg_cache(codec, WM8753_CLOCK) & 0xfffb; 1195 clock = snd_soc_read(codec, WM8753_CLOCK) & 0xfffb;
1243 wm8753_write(codec, WM8753_CLOCK, clock | 0x4); 1196 snd_soc_write(codec, WM8753_CLOCK, clock | 0x4);
1244 1197
1245 if (wm8753_hdac_set_dai_fmt(codec_dai, fmt) < 0) 1198 if (wm8753_hdac_set_dai_fmt(codec_dai, fmt) < 0)
1246 return -EINVAL; 1199 return -EINVAL;
@@ -1252,19 +1205,19 @@ static int wm8753_mode3_4_set_dai_fmt(struct snd_soc_dai *codec_dai,
1252static int wm8753_mute(struct snd_soc_dai *dai, int mute) 1205static int wm8753_mute(struct snd_soc_dai *dai, int mute)
1253{ 1206{
1254 struct snd_soc_codec *codec = dai->codec; 1207 struct snd_soc_codec *codec = dai->codec;
1255 u16 mute_reg = wm8753_read_reg_cache(codec, WM8753_DAC) & 0xfff7; 1208 u16 mute_reg = snd_soc_read(codec, WM8753_DAC) & 0xfff7;
1256 struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec); 1209 struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec);
1257 1210
1258 /* the digital mute covers the HiFi and Voice DAC's on the WM8753. 1211 /* the digital mute covers the HiFi and Voice DAC's on the WM8753.
1259 * make sure we check if they are not both active when we mute */ 1212 * make sure we check if they are not both active when we mute */
1260 if (mute && wm8753->dai_func == 1) { 1213 if (mute && wm8753->dai_func == 1) {
1261 if (!codec->active) 1214 if (!codec->active)
1262 wm8753_write(codec, WM8753_DAC, mute_reg | 0x8); 1215 snd_soc_write(codec, WM8753_DAC, mute_reg | 0x8);
1263 } else { 1216 } else {
1264 if (mute) 1217 if (mute)
1265 wm8753_write(codec, WM8753_DAC, mute_reg | 0x8); 1218 snd_soc_write(codec, WM8753_DAC, mute_reg | 0x8);
1266 else 1219 else
1267 wm8753_write(codec, WM8753_DAC, mute_reg); 1220 snd_soc_write(codec, WM8753_DAC, mute_reg);
1268 } 1221 }
1269 1222
1270 return 0; 1223 return 0;
@@ -1273,23 +1226,23 @@ static int wm8753_mute(struct snd_soc_dai *dai, int mute)
1273static int wm8753_set_bias_level(struct snd_soc_codec *codec, 1226static int wm8753_set_bias_level(struct snd_soc_codec *codec,
1274 enum snd_soc_bias_level level) 1227 enum snd_soc_bias_level level)
1275{ 1228{
1276 u16 pwr_reg = wm8753_read_reg_cache(codec, WM8753_PWR1) & 0xfe3e; 1229 u16 pwr_reg = snd_soc_read(codec, WM8753_PWR1) & 0xfe3e;
1277 1230
1278 switch (level) { 1231 switch (level) {
1279 case SND_SOC_BIAS_ON: 1232 case SND_SOC_BIAS_ON:
1280 /* set vmid to 50k and unmute dac */ 1233 /* set vmid to 50k and unmute dac */
1281 wm8753_write(codec, WM8753_PWR1, pwr_reg | 0x00c0); 1234 snd_soc_write(codec, WM8753_PWR1, pwr_reg | 0x00c0);
1282 break; 1235 break;
1283 case SND_SOC_BIAS_PREPARE: 1236 case SND_SOC_BIAS_PREPARE:
1284 /* set vmid to 5k for quick power up */ 1237 /* set vmid to 5k for quick power up */
1285 wm8753_write(codec, WM8753_PWR1, pwr_reg | 0x01c1); 1238 snd_soc_write(codec, WM8753_PWR1, pwr_reg | 0x01c1);
1286 break; 1239 break;
1287 case SND_SOC_BIAS_STANDBY: 1240 case SND_SOC_BIAS_STANDBY:
1288 /* mute dac and set vmid to 500k, enable VREF */ 1241 /* mute dac and set vmid to 500k, enable VREF */
1289 wm8753_write(codec, WM8753_PWR1, pwr_reg | 0x0141); 1242 snd_soc_write(codec, WM8753_PWR1, pwr_reg | 0x0141);
1290 break; 1243 break;
1291 case SND_SOC_BIAS_OFF: 1244 case SND_SOC_BIAS_OFF:
1292 wm8753_write(codec, WM8753_PWR1, 0x0001); 1245 snd_soc_write(codec, WM8753_PWR1, 0x0001);
1293 break; 1246 break;
1294 } 1247 }
1295 codec->bias_level = level; 1248 codec->bias_level = level;
@@ -1477,7 +1430,7 @@ static void wm8753_set_dai_mode(struct snd_soc_codec *codec,
1477 else 1430 else
1478 dai->driver = &wm8753_all_dai[(wm8753->dai_func << 1) + 1]; 1431 dai->driver = &wm8753_all_dai[(wm8753->dai_func << 1) + 1];
1479 } 1432 }
1480 wm8753_write(codec, WM8753_IOCTL, wm8753->dai_func); 1433 snd_soc_write(codec, WM8753_IOCTL, wm8753->dai_func);
1481} 1434}
1482 1435
1483static void wm8753_work(struct work_struct *work) 1436static void wm8753_work(struct work_struct *work)
@@ -1495,22 +1448,19 @@ static int wm8753_suspend(struct snd_soc_codec *codec, pm_message_t state)
1495 1448
1496static int wm8753_resume(struct snd_soc_codec *codec) 1449static int wm8753_resume(struct snd_soc_codec *codec)
1497{ 1450{
1451 u16 *reg_cache = codec->reg_cache;
1498 int i; 1452 int i;
1499 u8 data[2];
1500 u16 *cache = codec->reg_cache;
1501 1453
1502 /* Sync reg_cache with the hardware */ 1454 /* Sync reg_cache with the hardware */
1503 for (i = 0; i < ARRAY_SIZE(wm8753_reg); i++) { 1455 for (i = 1; i < ARRAY_SIZE(wm8753_reg); i++) {
1504 if (i + 1 == WM8753_RESET) 1456 if (i == WM8753_RESET)
1505 continue; 1457 continue;
1506 1458
1507 /* No point in writing hardware default values back */ 1459 /* No point in writing hardware default values back */
1508 if (cache[i] == wm8753_reg[i]) 1460 if (reg_cache[i] == wm8753_reg[i])
1509 continue; 1461 continue;
1510 1462
1511 data[0] = ((i + 1) << 1) | ((cache[i] >> 8) & 0x0001); 1463 snd_soc_write(codec, i, reg_cache[i]);
1512 data[1] = cache[i] & 0x00ff;
1513 codec->hw_write(codec->control_data, data, 2);
1514 } 1464 }
1515 1465
1516 wm8753_set_bias_level(codec, SND_SOC_BIAS_STANDBY); 1466 wm8753_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
@@ -1548,7 +1498,7 @@ static int run_delayed_work(struct delayed_work *dwork)
1548static int wm8753_probe(struct snd_soc_codec *codec) 1498static int wm8753_probe(struct snd_soc_codec *codec)
1549{ 1499{
1550 struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec); 1500 struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec);
1551 int ret = 0, reg; 1501 int ret;
1552 1502
1553 INIT_DELAYED_WORK(&codec->delayed_work, wm8753_work); 1503 INIT_DELAYED_WORK(&codec->delayed_work, wm8753_work);
1554 1504
@@ -1573,26 +1523,16 @@ static int wm8753_probe(struct snd_soc_codec *codec)
1573 msecs_to_jiffies(caps_charge)); 1523 msecs_to_jiffies(caps_charge));
1574 1524
1575 /* set the update bits */ 1525 /* set the update bits */
1576 reg = wm8753_read_reg_cache(codec, WM8753_LDAC); 1526 snd_soc_update_bits(codec, WM8753_LDAC, 0x0100, 0x0100);
1577 wm8753_write(codec, WM8753_LDAC, reg | 0x0100); 1527 snd_soc_update_bits(codec, WM8753_RDAC, 0x0100, 0x0100);
1578 reg = wm8753_read_reg_cache(codec, WM8753_RDAC); 1528 snd_soc_update_bits(codec, WM8753_LDAC, 0x0100, 0x0100);
1579 wm8753_write(codec, WM8753_RDAC, reg | 0x0100); 1529 snd_soc_update_bits(codec, WM8753_RDAC, 0x0100, 0x0100);
1580 reg = wm8753_read_reg_cache(codec, WM8753_LADC); 1530 snd_soc_update_bits(codec, WM8753_LOUT1V, 0x0100, 0x0100);
1581 wm8753_write(codec, WM8753_LADC, reg | 0x0100); 1531 snd_soc_update_bits(codec, WM8753_ROUT1V, 0x0100, 0x0100);
1582 reg = wm8753_read_reg_cache(codec, WM8753_RADC); 1532 snd_soc_update_bits(codec, WM8753_LOUT2V, 0x0100, 0x0100);
1583 wm8753_write(codec, WM8753_RADC, reg | 0x0100); 1533 snd_soc_update_bits(codec, WM8753_ROUT2V, 0x0100, 0x0100);
1584 reg = wm8753_read_reg_cache(codec, WM8753_LOUT1V); 1534 snd_soc_update_bits(codec, WM8753_LINVOL, 0x0100, 0x0100);
1585 wm8753_write(codec, WM8753_LOUT1V, reg | 0x0100); 1535 snd_soc_update_bits(codec, WM8753_RINVOL, 0x0100, 0x0100);
1586 reg = wm8753_read_reg_cache(codec, WM8753_ROUT1V);
1587 wm8753_write(codec, WM8753_ROUT1V, reg | 0x0100);
1588 reg = wm8753_read_reg_cache(codec, WM8753_LOUT2V);
1589 wm8753_write(codec, WM8753_LOUT2V, reg | 0x0100);
1590 reg = wm8753_read_reg_cache(codec, WM8753_ROUT2V);
1591 wm8753_write(codec, WM8753_ROUT2V, reg | 0x0100);
1592 reg = wm8753_read_reg_cache(codec, WM8753_LINVOL);
1593 wm8753_write(codec, WM8753_LINVOL, reg | 0x0100);
1594 reg = wm8753_read_reg_cache(codec, WM8753_RINVOL);
1595 wm8753_write(codec, WM8753_RINVOL, reg | 0x0100);
1596 1536
1597 snd_soc_add_controls(codec, wm8753_snd_controls, 1537 snd_soc_add_controls(codec, wm8753_snd_controls,
1598 ARRAY_SIZE(wm8753_snd_controls)); 1538 ARRAY_SIZE(wm8753_snd_controls));
diff --git a/sound/soc/codecs/wm8904.c b/sound/soc/codecs/wm8904.c
index 9001cc48ba13..1ec12eff0620 100644
--- a/sound/soc/codecs/wm8904.c
+++ b/sound/soc/codecs/wm8904.c
@@ -50,8 +50,6 @@ static const char *wm8904_supply_names[WM8904_NUM_SUPPLIES] = {
50/* codec private data */ 50/* codec private data */
51struct wm8904_priv { 51struct wm8904_priv {
52 52
53 u16 reg_cache[WM8904_MAX_REGISTER + 1];
54
55 enum wm8904_type devtype; 53 enum wm8904_type devtype;
56 void *control_data; 54 void *control_data;
57 55
@@ -2094,7 +2092,7 @@ static int wm8904_digital_mute(struct snd_soc_dai *codec_dai, int mute)
2094 2092
2095static void wm8904_sync_cache(struct snd_soc_codec *codec) 2093static void wm8904_sync_cache(struct snd_soc_codec *codec)
2096{ 2094{
2097 struct wm8904_priv *wm8904 = snd_soc_codec_get_drvdata(codec); 2095 u16 *reg_cache = codec->reg_cache;
2098 int i; 2096 int i;
2099 2097
2100 if (!codec->cache_sync) 2098 if (!codec->cache_sync)
@@ -2105,14 +2103,14 @@ static void wm8904_sync_cache(struct snd_soc_codec *codec)
2105 /* Sync back cached values if they're different from the 2103 /* Sync back cached values if they're different from the
2106 * hardware default. 2104 * hardware default.
2107 */ 2105 */
2108 for (i = 1; i < ARRAY_SIZE(wm8904->reg_cache); i++) { 2106 for (i = 1; i < codec->driver->reg_cache_size; i++) {
2109 if (!wm8904_access[i].writable) 2107 if (!wm8904_access[i].writable)
2110 continue; 2108 continue;
2111 2109
2112 if (wm8904->reg_cache[i] == wm8904_reg[i]) 2110 if (reg_cache[i] == wm8904_reg[i])
2113 continue; 2111 continue;
2114 2112
2115 snd_soc_write(codec, i, wm8904->reg_cache[i]); 2113 snd_soc_write(codec, i, reg_cache[i]);
2116 } 2114 }
2117 2115
2118 codec->cache_sync = 0; 2116 codec->cache_sync = 0;
@@ -2371,6 +2369,7 @@ static int wm8904_probe(struct snd_soc_codec *codec)
2371{ 2369{
2372 struct wm8904_priv *wm8904 = snd_soc_codec_get_drvdata(codec); 2370 struct wm8904_priv *wm8904 = snd_soc_codec_get_drvdata(codec);
2373 struct wm8904_pdata *pdata = wm8904->pdata; 2371 struct wm8904_pdata *pdata = wm8904->pdata;
2372 u16 *reg_cache = codec->reg_cache;
2374 int ret, i; 2373 int ret, i;
2375 2374
2376 codec->cache_sync = 1; 2375 codec->cache_sync = 1;
@@ -2437,19 +2436,19 @@ static int wm8904_probe(struct snd_soc_codec *codec)
2437 } 2436 }
2438 2437
2439 /* Change some default settings - latch VU and enable ZC */ 2438 /* Change some default settings - latch VU and enable ZC */
2440 wm8904->reg_cache[WM8904_ADC_DIGITAL_VOLUME_LEFT] |= WM8904_ADC_VU; 2439 reg_cache[WM8904_ADC_DIGITAL_VOLUME_LEFT] |= WM8904_ADC_VU;
2441 wm8904->reg_cache[WM8904_ADC_DIGITAL_VOLUME_RIGHT] |= WM8904_ADC_VU; 2440 reg_cache[WM8904_ADC_DIGITAL_VOLUME_RIGHT] |= WM8904_ADC_VU;
2442 wm8904->reg_cache[WM8904_DAC_DIGITAL_VOLUME_LEFT] |= WM8904_DAC_VU; 2441 reg_cache[WM8904_DAC_DIGITAL_VOLUME_LEFT] |= WM8904_DAC_VU;
2443 wm8904->reg_cache[WM8904_DAC_DIGITAL_VOLUME_RIGHT] |= WM8904_DAC_VU; 2442 reg_cache[WM8904_DAC_DIGITAL_VOLUME_RIGHT] |= WM8904_DAC_VU;
2444 wm8904->reg_cache[WM8904_ANALOGUE_OUT1_LEFT] |= WM8904_HPOUT_VU | 2443 reg_cache[WM8904_ANALOGUE_OUT1_LEFT] |= WM8904_HPOUT_VU |
2445 WM8904_HPOUTLZC; 2444 WM8904_HPOUTLZC;
2446 wm8904->reg_cache[WM8904_ANALOGUE_OUT1_RIGHT] |= WM8904_HPOUT_VU | 2445 reg_cache[WM8904_ANALOGUE_OUT1_RIGHT] |= WM8904_HPOUT_VU |
2447 WM8904_HPOUTRZC; 2446 WM8904_HPOUTRZC;
2448 wm8904->reg_cache[WM8904_ANALOGUE_OUT2_LEFT] |= WM8904_LINEOUT_VU | 2447 reg_cache[WM8904_ANALOGUE_OUT2_LEFT] |= WM8904_LINEOUT_VU |
2449 WM8904_LINEOUTLZC; 2448 WM8904_LINEOUTLZC;
2450 wm8904->reg_cache[WM8904_ANALOGUE_OUT2_RIGHT] |= WM8904_LINEOUT_VU | 2449 reg_cache[WM8904_ANALOGUE_OUT2_RIGHT] |= WM8904_LINEOUT_VU |
2451 WM8904_LINEOUTRZC; 2450 WM8904_LINEOUTRZC;
2452 wm8904->reg_cache[WM8904_CLOCK_RATES_0] &= ~WM8904_SR_MODE; 2451 reg_cache[WM8904_CLOCK_RATES_0] &= ~WM8904_SR_MODE;
2453 2452
2454 /* Apply configuration from the platform data. */ 2453 /* Apply configuration from the platform data. */
2455 if (wm8904->pdata) { 2454 if (wm8904->pdata) {
@@ -2457,23 +2456,23 @@ static int wm8904_probe(struct snd_soc_codec *codec)
2457 if (!pdata->gpio_cfg[i]) 2456 if (!pdata->gpio_cfg[i])
2458 continue; 2457 continue;
2459 2458
2460 wm8904->reg_cache[WM8904_GPIO_CONTROL_1 + i] 2459 reg_cache[WM8904_GPIO_CONTROL_1 + i]
2461 = pdata->gpio_cfg[i] & 0xffff; 2460 = pdata->gpio_cfg[i] & 0xffff;
2462 } 2461 }
2463 2462
2464 /* Zero is the default value for these anyway */ 2463 /* Zero is the default value for these anyway */
2465 for (i = 0; i < WM8904_MIC_REGS; i++) 2464 for (i = 0; i < WM8904_MIC_REGS; i++)
2466 wm8904->reg_cache[WM8904_MIC_BIAS_CONTROL_0 + i] 2465 reg_cache[WM8904_MIC_BIAS_CONTROL_0 + i]
2467 = pdata->mic_cfg[i]; 2466 = pdata->mic_cfg[i];
2468 } 2467 }
2469 2468
2470 /* Set Class W by default - this will be managed by the Class 2469 /* Set Class W by default - this will be managed by the Class
2471 * G widget at runtime where bypass paths are available. 2470 * G widget at runtime where bypass paths are available.
2472 */ 2471 */
2473 wm8904->reg_cache[WM8904_CLASS_W_0] |= WM8904_CP_DYN_PWR; 2472 reg_cache[WM8904_CLASS_W_0] |= WM8904_CP_DYN_PWR;
2474 2473
2475 /* Use normal bias source */ 2474 /* Use normal bias source */
2476 wm8904->reg_cache[WM8904_BIAS_CONTROL_0] &= ~WM8904_POBCTRL; 2475 reg_cache[WM8904_BIAS_CONTROL_0] &= ~WM8904_POBCTRL;
2477 2476
2478 wm8904_set_bias_level(codec, SND_SOC_BIAS_STANDBY); 2477 wm8904_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
2479 2478
diff --git a/sound/soc/codecs/wm8940.c b/sound/soc/codecs/wm8940.c
index 2cb16f895c46..23086e2c976a 100644
--- a/sound/soc/codecs/wm8940.c
+++ b/sound/soc/codecs/wm8940.c
@@ -768,6 +768,7 @@ static __devinit int wm8940_i2c_probe(struct i2c_client *i2c,
768 768
769 i2c_set_clientdata(i2c, wm8940); 769 i2c_set_clientdata(i2c, wm8940);
770 wm8940->control_data = i2c; 770 wm8940->control_data = i2c;
771 wm8940->control_type = SND_SOC_I2C;
771 772
772 ret = snd_soc_register_codec(&i2c->dev, 773 ret = snd_soc_register_codec(&i2c->dev,
773 &soc_codec_dev_wm8940, &wm8940_dai, 1); 774 &soc_codec_dev_wm8940, &wm8940_dai, 1);
diff --git a/sound/soc/codecs/wm8955.c b/sound/soc/codecs/wm8955.c
index 9cbab8e1de01..2ac35b0be86a 100644
--- a/sound/soc/codecs/wm8955.c
+++ b/sound/soc/codecs/wm8955.c
@@ -42,8 +42,6 @@ static const char *wm8955_supply_names[WM8955_NUM_SUPPLIES] = {
42struct wm8955_priv { 42struct wm8955_priv {
43 enum snd_soc_control_type control_type; 43 enum snd_soc_control_type control_type;
44 44
45 u16 reg_cache[WM8955_MAX_REGISTER + 1];
46
47 unsigned int mclk_rate; 45 unsigned int mclk_rate;
48 46
49 int deemph; 47 int deemph;
@@ -768,6 +766,7 @@ static int wm8955_set_bias_level(struct snd_soc_codec *codec,
768 enum snd_soc_bias_level level) 766 enum snd_soc_bias_level level)
769{ 767{
770 struct wm8955_priv *wm8955 = snd_soc_codec_get_drvdata(codec); 768 struct wm8955_priv *wm8955 = snd_soc_codec_get_drvdata(codec);
769 u16 *reg_cache = codec->reg_cache;
771 int ret, i; 770 int ret, i;
772 771
773 switch (level) { 772 switch (level) {
@@ -800,14 +799,14 @@ static int wm8955_set_bias_level(struct snd_soc_codec *codec,
800 /* Sync back cached values if they're 799 /* Sync back cached values if they're
801 * different from the hardware default. 800 * different from the hardware default.
802 */ 801 */
803 for (i = 0; i < ARRAY_SIZE(wm8955->reg_cache); i++) { 802 for (i = 0; i < codec->driver->reg_cache_size; i++) {
804 if (i == WM8955_RESET) 803 if (i == WM8955_RESET)
805 continue; 804 continue;
806 805
807 if (wm8955->reg_cache[i] == wm8955_reg[i]) 806 if (reg_cache[i] == wm8955_reg[i])
808 continue; 807 continue;
809 808
810 snd_soc_write(codec, i, wm8955->reg_cache[i]); 809 snd_soc_write(codec, i, reg_cache[i]);
811 } 810 }
812 811
813 /* Enable VREF and VMID */ 812 /* Enable VREF and VMID */
@@ -902,6 +901,7 @@ static int wm8955_probe(struct snd_soc_codec *codec)
902{ 901{
903 struct wm8955_priv *wm8955 = snd_soc_codec_get_drvdata(codec); 902 struct wm8955_priv *wm8955 = snd_soc_codec_get_drvdata(codec);
904 struct wm8955_pdata *pdata = dev_get_platdata(codec->dev); 903 struct wm8955_pdata *pdata = dev_get_platdata(codec->dev);
904 u16 *reg_cache = codec->reg_cache;
905 int ret, i; 905 int ret, i;
906 906
907 ret = snd_soc_codec_set_cache_io(codec, 7, 9, wm8955->control_type); 907 ret = snd_soc_codec_set_cache_io(codec, 7, 9, wm8955->control_type);
@@ -934,25 +934,25 @@ static int wm8955_probe(struct snd_soc_codec *codec)
934 } 934 }
935 935
936 /* Change some default settings - latch VU and enable ZC */ 936 /* Change some default settings - latch VU and enable ZC */
937 wm8955->reg_cache[WM8955_LEFT_DAC_VOLUME] |= WM8955_LDVU; 937 reg_cache[WM8955_LEFT_DAC_VOLUME] |= WM8955_LDVU;
938 wm8955->reg_cache[WM8955_RIGHT_DAC_VOLUME] |= WM8955_RDVU; 938 reg_cache[WM8955_RIGHT_DAC_VOLUME] |= WM8955_RDVU;
939 wm8955->reg_cache[WM8955_LOUT1_VOLUME] |= WM8955_LO1VU | WM8955_LO1ZC; 939 reg_cache[WM8955_LOUT1_VOLUME] |= WM8955_LO1VU | WM8955_LO1ZC;
940 wm8955->reg_cache[WM8955_ROUT1_VOLUME] |= WM8955_RO1VU | WM8955_RO1ZC; 940 reg_cache[WM8955_ROUT1_VOLUME] |= WM8955_RO1VU | WM8955_RO1ZC;
941 wm8955->reg_cache[WM8955_LOUT2_VOLUME] |= WM8955_LO2VU | WM8955_LO2ZC; 941 reg_cache[WM8955_LOUT2_VOLUME] |= WM8955_LO2VU | WM8955_LO2ZC;
942 wm8955->reg_cache[WM8955_ROUT2_VOLUME] |= WM8955_RO2VU | WM8955_RO2ZC; 942 reg_cache[WM8955_ROUT2_VOLUME] |= WM8955_RO2VU | WM8955_RO2ZC;
943 wm8955->reg_cache[WM8955_MONOOUT_VOLUME] |= WM8955_MOZC; 943 reg_cache[WM8955_MONOOUT_VOLUME] |= WM8955_MOZC;
944 944
945 /* Also enable adaptive bass boost by default */ 945 /* Also enable adaptive bass boost by default */
946 wm8955->reg_cache[WM8955_BASS_CONTROL] |= WM8955_BB; 946 reg_cache[WM8955_BASS_CONTROL] |= WM8955_BB;
947 947
948 /* Set platform data values */ 948 /* Set platform data values */
949 if (pdata) { 949 if (pdata) {
950 if (pdata->out2_speaker) 950 if (pdata->out2_speaker)
951 wm8955->reg_cache[WM8955_ADDITIONAL_CONTROL_2] 951 reg_cache[WM8955_ADDITIONAL_CONTROL_2]
952 |= WM8955_ROUT2INV; 952 |= WM8955_ROUT2INV;
953 953
954 if (pdata->monoin_diff) 954 if (pdata->monoin_diff)
955 wm8955->reg_cache[WM8955_MONO_OUT_MIX_1] 955 reg_cache[WM8955_MONO_OUT_MIX_1]
956 |= WM8955_DMEN; 956 |= WM8955_DMEN;
957 } 957 }
958 958
@@ -1003,6 +1003,7 @@ static __devinit int wm8955_i2c_probe(struct i2c_client *i2c,
1003 return -ENOMEM; 1003 return -ENOMEM;
1004 1004
1005 i2c_set_clientdata(i2c, wm8955); 1005 i2c_set_clientdata(i2c, wm8955);
1006 wm8955->control_type = SND_SOC_I2C;
1006 1007
1007 ret = snd_soc_register_codec(&i2c->dev, 1008 ret = snd_soc_register_codec(&i2c->dev,
1008 &soc_codec_dev_wm8955, &wm8955_dai, 1); 1009 &soc_codec_dev_wm8955, &wm8955_dai, 1);
diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c
index 21986c42272f..ff6ff2f529d2 100644
--- a/sound/soc/codecs/wm8960.c
+++ b/sound/soc/codecs/wm8960.c
@@ -1013,6 +1013,7 @@ static __devinit int wm8960_i2c_probe(struct i2c_client *i2c,
1013 return -ENOMEM; 1013 return -ENOMEM;
1014 1014
1015 i2c_set_clientdata(i2c, wm8960); 1015 i2c_set_clientdata(i2c, wm8960);
1016 wm8960->control_type = SND_SOC_I2C;
1016 wm8960->control_data = i2c; 1017 wm8960->control_data = i2c;
1017 1018
1018 ret = snd_soc_register_codec(&i2c->dev, 1019 ret = snd_soc_register_codec(&i2c->dev,
diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c
index 1304ca91a11c..7c421cc837bd 100644
--- a/sound/soc/codecs/wm8962.c
+++ b/sound/soc/codecs/wm8962.c
@@ -52,8 +52,6 @@ static const char *wm8962_supply_names[WM8962_NUM_SUPPLIES] = {
52struct wm8962_priv { 52struct wm8962_priv {
53 struct snd_soc_codec *codec; 53 struct snd_soc_codec *codec;
54 54
55 u16 reg_cache[WM8962_MAX_REGISTER + 1];
56
57 int sysclk; 55 int sysclk;
58 int sysclk_rate; 56 int sysclk_rate;
59 57
@@ -1991,8 +1989,7 @@ static int wm8962_put_hp_sw(struct snd_kcontrol *kcontrol,
1991 struct snd_ctl_elem_value *ucontrol) 1989 struct snd_ctl_elem_value *ucontrol)
1992{ 1990{
1993 struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); 1991 struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
1994 struct wm8962_priv *wm8962 = snd_soc_codec_get_drvdata(codec); 1992 u16 *reg_cache = codec->reg_cache;
1995 u16 *reg_cache = wm8962->reg_cache;
1996 int ret; 1993 int ret;
1997 1994
1998 /* Apply the update (if any) */ 1995 /* Apply the update (if any) */
@@ -2020,8 +2017,7 @@ static int wm8962_put_spk_sw(struct snd_kcontrol *kcontrol,
2020 struct snd_ctl_elem_value *ucontrol) 2017 struct snd_ctl_elem_value *ucontrol)
2021{ 2018{
2022 struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); 2019 struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
2023 struct wm8962_priv *wm8962 = snd_soc_codec_get_drvdata(codec); 2020 u16 *reg_cache = codec->reg_cache;
2024 u16 *reg_cache = wm8962->reg_cache;
2025 int ret; 2021 int ret;
2026 2022
2027 /* Apply the update (if any) */ 2023 /* Apply the update (if any) */
@@ -2329,8 +2325,7 @@ static int out_pga_event(struct snd_soc_dapm_widget *w,
2329 struct snd_kcontrol *kcontrol, int event) 2325 struct snd_kcontrol *kcontrol, int event)
2330{ 2326{
2331 struct snd_soc_codec *codec = w->codec; 2327 struct snd_soc_codec *codec = w->codec;
2332 struct wm8962_priv *wm8962 = snd_soc_codec_get_drvdata(codec); 2328 u16 *reg_cache = codec->reg_cache;
2333 u16 *reg_cache = wm8962->reg_cache;
2334 int reg; 2329 int reg;
2335 2330
2336 switch (w->shift) { 2331 switch (w->shift) {
@@ -2719,7 +2714,7 @@ static int wm8962_add_widgets(struct snd_soc_codec *codec)
2719 2714
2720static void wm8962_sync_cache(struct snd_soc_codec *codec) 2715static void wm8962_sync_cache(struct snd_soc_codec *codec)
2721{ 2716{
2722 struct wm8962_priv *wm8962 = snd_soc_codec_get_drvdata(codec); 2717 u16 *reg_cache = codec->reg_cache;
2723 int i; 2718 int i;
2724 2719
2725 if (!codec->cache_sync) 2720 if (!codec->cache_sync)
@@ -2732,13 +2727,13 @@ static void wm8962_sync_cache(struct snd_soc_codec *codec)
2732 /* Sync back cached values if they're different from the 2727 /* Sync back cached values if they're different from the
2733 * hardware default. 2728 * hardware default.
2734 */ 2729 */
2735 for (i = 1; i < ARRAY_SIZE(wm8962->reg_cache); i++) { 2730 for (i = 1; i < codec->driver->reg_cache_size; i++) {
2736 if (i == WM8962_SOFTWARE_RESET) 2731 if (i == WM8962_SOFTWARE_RESET)
2737 continue; 2732 continue;
2738 if (wm8962->reg_cache[i] == wm8962_reg[i]) 2733 if (reg_cache[i] == wm8962_reg[i])
2739 continue; 2734 continue;
2740 2735
2741 snd_soc_write(codec, i, wm8962->reg_cache[i]); 2736 snd_soc_write(codec, i, reg_cache[i]);
2742 } 2737 }
2743 2738
2744 codec->cache_sync = 0; 2739 codec->cache_sync = 0;
@@ -3406,12 +3401,11 @@ EXPORT_SYMBOL_GPL(wm8962_mic_detect);
3406#ifdef CONFIG_PM 3401#ifdef CONFIG_PM
3407static int wm8962_resume(struct snd_soc_codec *codec) 3402static int wm8962_resume(struct snd_soc_codec *codec)
3408{ 3403{
3409 struct wm8962_priv *wm8962 = snd_soc_codec_get_drvdata(codec);
3410 u16 *reg_cache = codec->reg_cache; 3404 u16 *reg_cache = codec->reg_cache;
3411 int i; 3405 int i;
3412 3406
3413 /* Restore the registers */ 3407 /* Restore the registers */
3414 for (i = 1; i < ARRAY_SIZE(wm8962->reg_cache); i++) { 3408 for (i = 1; i < codec->driver->reg_cache_size; i++) {
3415 switch (i) { 3409 switch (i) {
3416 case WM8962_SOFTWARE_RESET: 3410 case WM8962_SOFTWARE_RESET:
3417 continue; 3411 continue;
@@ -3705,6 +3699,7 @@ static int wm8962_probe(struct snd_soc_codec *codec)
3705 struct wm8962_pdata *pdata = dev_get_platdata(codec->dev); 3699 struct wm8962_pdata *pdata = dev_get_platdata(codec->dev);
3706 struct i2c_client *i2c = container_of(codec->dev, struct i2c_client, 3700 struct i2c_client *i2c = container_of(codec->dev, struct i2c_client,
3707 dev); 3701 dev);
3702 u16 *reg_cache = codec->reg_cache;
3708 int i, trigger, irq_pol; 3703 int i, trigger, irq_pol;
3709 3704
3710 wm8962->codec = codec; 3705 wm8962->codec = codec;
@@ -3804,7 +3799,7 @@ static int wm8962_probe(struct snd_soc_codec *codec)
3804 3799
3805 /* Put the speakers into mono mode? */ 3800 /* Put the speakers into mono mode? */
3806 if (pdata->spk_mono) 3801 if (pdata->spk_mono)
3807 wm8962->reg_cache[WM8962_CLASS_D_CONTROL_2] 3802 reg_cache[WM8962_CLASS_D_CONTROL_2]
3808 |= WM8962_SPK_MONO; 3803 |= WM8962_SPK_MONO;
3809 3804
3810 /* Micbias setup, detection enable and detection 3805 /* Micbias setup, detection enable and detection
@@ -3819,16 +3814,16 @@ static int wm8962_probe(struct snd_soc_codec *codec)
3819 } 3814 }
3820 3815
3821 /* Latch volume update bits */ 3816 /* Latch volume update bits */
3822 wm8962->reg_cache[WM8962_LEFT_INPUT_VOLUME] |= WM8962_IN_VU; 3817 reg_cache[WM8962_LEFT_INPUT_VOLUME] |= WM8962_IN_VU;
3823 wm8962->reg_cache[WM8962_RIGHT_INPUT_VOLUME] |= WM8962_IN_VU; 3818 reg_cache[WM8962_RIGHT_INPUT_VOLUME] |= WM8962_IN_VU;
3824 wm8962->reg_cache[WM8962_LEFT_ADC_VOLUME] |= WM8962_ADC_VU; 3819 reg_cache[WM8962_LEFT_ADC_VOLUME] |= WM8962_ADC_VU;
3825 wm8962->reg_cache[WM8962_RIGHT_ADC_VOLUME] |= WM8962_ADC_VU; 3820 reg_cache[WM8962_RIGHT_ADC_VOLUME] |= WM8962_ADC_VU;
3826 wm8962->reg_cache[WM8962_LEFT_DAC_VOLUME] |= WM8962_DAC_VU; 3821 reg_cache[WM8962_LEFT_DAC_VOLUME] |= WM8962_DAC_VU;
3827 wm8962->reg_cache[WM8962_RIGHT_DAC_VOLUME] |= WM8962_DAC_VU; 3822 reg_cache[WM8962_RIGHT_DAC_VOLUME] |= WM8962_DAC_VU;
3828 wm8962->reg_cache[WM8962_SPKOUTL_VOLUME] |= WM8962_SPKOUT_VU; 3823 reg_cache[WM8962_SPKOUTL_VOLUME] |= WM8962_SPKOUT_VU;
3829 wm8962->reg_cache[WM8962_SPKOUTR_VOLUME] |= WM8962_SPKOUT_VU; 3824 reg_cache[WM8962_SPKOUTR_VOLUME] |= WM8962_SPKOUT_VU;
3830 wm8962->reg_cache[WM8962_HPOUTL_VOLUME] |= WM8962_HPOUT_VU; 3825 reg_cache[WM8962_HPOUTL_VOLUME] |= WM8962_HPOUT_VU;
3831 wm8962->reg_cache[WM8962_HPOUTR_VOLUME] |= WM8962_HPOUT_VU; 3826 reg_cache[WM8962_HPOUTR_VOLUME] |= WM8962_HPOUT_VU;
3832 3827
3833 wm8962_add_widgets(codec); 3828 wm8962_add_widgets(codec);
3834 3829
diff --git a/sound/soc/codecs/wm8971.c b/sound/soc/codecs/wm8971.c
index 63f6dbf5d070..9f18db6e167c 100644
--- a/sound/soc/codecs/wm8971.c
+++ b/sound/soc/codecs/wm8971.c
@@ -718,6 +718,7 @@ static __devinit int wm8971_i2c_probe(struct i2c_client *i2c,
718 if (wm8971 == NULL) 718 if (wm8971 == NULL)
719 return -ENOMEM; 719 return -ENOMEM;
720 720
721 wm8971->control_type = SND_SOC_I2C;
721 i2c_set_clientdata(i2c, wm8971); 722 i2c_set_clientdata(i2c, wm8971);
722 723
723 ret = snd_soc_register_codec(&i2c->dev, 724 ret = snd_soc_register_codec(&i2c->dev,
diff --git a/sound/soc/codecs/wm9081.c b/sound/soc/codecs/wm9081.c
index ecc7c37180c7..a486670966bd 100644
--- a/sound/soc/codecs/wm9081.c
+++ b/sound/soc/codecs/wm9081.c
@@ -1335,6 +1335,7 @@ static __devinit int wm9081_i2c_probe(struct i2c_client *i2c,
1335 return -ENOMEM; 1335 return -ENOMEM;
1336 1336
1337 i2c_set_clientdata(i2c, wm9081); 1337 i2c_set_clientdata(i2c, wm9081);
1338 wm9081->control_type = SND_SOC_I2C;
1338 wm9081->control_data = i2c; 1339 wm9081->control_data = i2c;
1339 1340
1340 ret = snd_soc_register_codec(&i2c->dev, 1341 ret = snd_soc_register_codec(&i2c->dev,
diff --git a/sound/soc/codecs/wm9090.c b/sound/soc/codecs/wm9090.c
index 99c046ba46bb..6e5f64f627cb 100644
--- a/sound/soc/codecs/wm9090.c
+++ b/sound/soc/codecs/wm9090.c
@@ -141,7 +141,6 @@ static const u16 wm9090_reg_defaults[] = {
141/* This struct is used to save the context */ 141/* This struct is used to save the context */
142struct wm9090_priv { 142struct wm9090_priv {
143 struct mutex mutex; 143 struct mutex mutex;
144 u16 reg_cache[WM9090_MAX_REGISTER + 1];
145 struct wm9090_platform_data pdata; 144 struct wm9090_platform_data pdata;
146 void *control_data; 145 void *control_data;
147}; 146};
@@ -552,6 +551,7 @@ static int wm9090_set_bias_level(struct snd_soc_codec *codec,
552static int wm9090_probe(struct snd_soc_codec *codec) 551static int wm9090_probe(struct snd_soc_codec *codec)
553{ 552{
554 struct wm9090_priv *wm9090 = snd_soc_codec_get_drvdata(codec); 553 struct wm9090_priv *wm9090 = snd_soc_codec_get_drvdata(codec);
554 u16 *reg_cache = codec->reg_cache;
555 int ret; 555 int ret;
556 556
557 codec->control_data = wm9090->control_data; 557 codec->control_data = wm9090->control_data;
@@ -576,22 +576,22 @@ static int wm9090_probe(struct snd_soc_codec *codec)
576 /* Configure some defaults; they will be written out when we 576 /* Configure some defaults; they will be written out when we
577 * bring the bias up. 577 * bring the bias up.
578 */ 578 */
579 wm9090->reg_cache[WM9090_IN1_LINE_INPUT_A_VOLUME] |= WM9090_IN1_VU 579 reg_cache[WM9090_IN1_LINE_INPUT_A_VOLUME] |= WM9090_IN1_VU
580 | WM9090_IN1A_ZC; 580 | WM9090_IN1A_ZC;
581 wm9090->reg_cache[WM9090_IN1_LINE_INPUT_B_VOLUME] |= WM9090_IN1_VU 581 reg_cache[WM9090_IN1_LINE_INPUT_B_VOLUME] |= WM9090_IN1_VU
582 | WM9090_IN1B_ZC; 582 | WM9090_IN1B_ZC;
583 wm9090->reg_cache[WM9090_IN2_LINE_INPUT_A_VOLUME] |= WM9090_IN2_VU 583 reg_cache[WM9090_IN2_LINE_INPUT_A_VOLUME] |= WM9090_IN2_VU
584 | WM9090_IN2A_ZC; 584 | WM9090_IN2A_ZC;
585 wm9090->reg_cache[WM9090_IN2_LINE_INPUT_B_VOLUME] |= WM9090_IN2_VU 585 reg_cache[WM9090_IN2_LINE_INPUT_B_VOLUME] |= WM9090_IN2_VU
586 | WM9090_IN2B_ZC; 586 | WM9090_IN2B_ZC;
587 wm9090->reg_cache[WM9090_SPEAKER_VOLUME_LEFT] |= 587 reg_cache[WM9090_SPEAKER_VOLUME_LEFT] |=
588 WM9090_SPKOUT_VU | WM9090_SPKOUTL_ZC; 588 WM9090_SPKOUT_VU | WM9090_SPKOUTL_ZC;
589 wm9090->reg_cache[WM9090_LEFT_OUTPUT_VOLUME] |= 589 reg_cache[WM9090_LEFT_OUTPUT_VOLUME] |=
590 WM9090_HPOUT1_VU | WM9090_HPOUT1L_ZC; 590 WM9090_HPOUT1_VU | WM9090_HPOUT1L_ZC;
591 wm9090->reg_cache[WM9090_RIGHT_OUTPUT_VOLUME] |= 591 reg_cache[WM9090_RIGHT_OUTPUT_VOLUME] |=
592 WM9090_HPOUT1_VU | WM9090_HPOUT1R_ZC; 592 WM9090_HPOUT1_VU | WM9090_HPOUT1R_ZC;
593 593
594 wm9090->reg_cache[WM9090_CLOCKING_1] |= WM9090_TOCLK_ENA; 594 reg_cache[WM9090_CLOCKING_1] |= WM9090_TOCLK_ENA;
595 595
596 wm9090_set_bias_level(codec, SND_SOC_BIAS_STANDBY); 596 wm9090_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
597 597
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index b2c63309a651..6f5a498608b2 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -24,12 +24,47 @@ OPTIONS
24--input=:: 24--input=::
25 Input file name. (default: perf.data) 25 Input file name. (default: perf.data)
26 26
27-d::
28--dsos=<dso[,dso...]>::
29 Only consider symbols in these dsos.
30-s::
31--symbol=<symbol>::
32 Symbol to annotate.
33
34-f::
35--force::
36 Don't complain, do it.
37
38-v::
39--verbose::
40 Be more verbose. (Show symbol address, etc)
41
42-D::
43--dump-raw-trace::
44 Dump raw trace in ASCII.
45
46-k::
47--vmlinux=<file>::
48 vmlinux pathname.
49
50-m::
51--modules::
52 Load module symbols. WARNING: use only with -k and LIVE kernel.
53
54-l::
55--print-line::
56 Print matching source lines (may be slow).
57
58-P::
59--full-paths::
60 Don't shorten the displayed pathnames.
61
27--stdio:: Use the stdio interface. 62--stdio:: Use the stdio interface.
28 63
29--tui:: Use the TUI interface Use of --tui requires a tty, if one is not 64--tui:: Use the TUI interface Use of --tui requires a tty, if one is not
30 present, as when piping to other commands, the stdio interface is 65 present, as when piping to other commands, the stdio interface is
31 used. This interfaces starts by centering on the line with more 66 used. This interfaces starts by centering on the line with more
32 samples, TAB/UNTAB cycles thru the lines with more samples. 67 samples, TAB/UNTAB cycles through the lines with more samples.
33 68
34SEE ALSO 69SEE ALSO
35-------- 70--------
diff --git a/tools/perf/Documentation/perf-buildid-list.txt b/tools/perf/Documentation/perf-buildid-list.txt
index 01b642c0bf8f..5eaac6f26d51 100644
--- a/tools/perf/Documentation/perf-buildid-list.txt
+++ b/tools/perf/Documentation/perf-buildid-list.txt
@@ -18,6 +18,9 @@ perf report.
18 18
19OPTIONS 19OPTIONS
20------- 20-------
21-H::
22--with-hits::
23 Show only DSOs with hits.
21-i:: 24-i::
22--input=:: 25--input=::
23 Input file name. (default: perf.data) 26 Input file name. (default: perf.data)
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index 20d97d84ea1c..74d7481ed7a6 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -19,6 +19,18 @@ If no parameters are passed it will assume perf.data.old and perf.data.
19 19
20OPTIONS 20OPTIONS
21------- 21-------
22-M::
23--displacement::
24 Show position displacement relative to baseline.
25
26-D::
27--dump-raw-trace::
28 Dump raw trace in ASCII.
29
30-m::
31--modules::
32 Load module symbols. WARNING: use only with -k and LIVE kernel
33
22-d:: 34-d::
23--dsos=:: 35--dsos=::
24 Only consider symbols in these dsos. CSV that understands 36 Only consider symbols in these dsos. CSV that understands
@@ -42,7 +54,7 @@ OPTIONS
42--field-separator=:: 54--field-separator=::
43 55
44 Use a special separator character and don't pad with spaces, replacing 56 Use a special separator character and don't pad with spaces, replacing
45 all occurances of this separator in symbol names (and other output) 57 all occurrences of this separator in symbol names (and other output)
46 with a '.' character, that thus it's the only non valid separator. 58 with a '.' character, that thus it's the only non valid separator.
47 59
48-v:: 60-v::
@@ -50,6 +62,13 @@ OPTIONS
50 Be verbose, for instance, show the raw counts in addition to the 62 Be verbose, for instance, show the raw counts in addition to the
51 diff. 63 diff.
52 64
65-f::
66--force::
67 Don't complain, do it.
68
69--symfs=<directory>::
70 Look for files with symbols relative to this directory.
71
53SEE ALSO 72SEE ALSO
54-------- 73--------
55linkperf:perf-record[1] 74linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-kvm.txt b/tools/perf/Documentation/perf-kvm.txt
index d004e19fe6d6..dd84cb2f0a88 100644
--- a/tools/perf/Documentation/perf-kvm.txt
+++ b/tools/perf/Documentation/perf-kvm.txt
@@ -22,7 +22,7 @@ There are a couple of variants of perf kvm:
22 a performance counter profile of guest os in realtime 22 a performance counter profile of guest os in realtime
23 of an arbitrary workload. 23 of an arbitrary workload.
24 24
25 'perf kvm record <command>' to record the performance couinter profile 25 'perf kvm record <command>' to record the performance counter profile
26 of an arbitrary workload and save it into a perf data file. If both 26 of an arbitrary workload and save it into a perf data file. If both
27 --host and --guest are input, the perf data file name is perf.data.kvm. 27 --host and --guest are input, the perf data file name is perf.data.kvm.
28 If there is no --host but --guest, the file name is perf.data.guest. 28 If there is no --host but --guest, the file name is perf.data.guest.
@@ -40,6 +40,12 @@ There are a couple of variants of perf kvm:
40 40
41OPTIONS 41OPTIONS
42------- 42-------
43-i::
44--input=::
45 Input file name.
46-o::
47--output::
48 Output file name.
43--host=:: 49--host=::
44 Collect host side performance profile. 50 Collect host side performance profile.
45--guest=:: 51--guest=::
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index b317102138c8..921de259ea10 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -24,6 +24,21 @@ and statistics with this 'perf lock' command.
24 24
25 'perf lock report' reports statistical data. 25 'perf lock report' reports statistical data.
26 26
27OPTIONS
28-------
29
30-i::
31--input=<file>::
32 Input file name.
33
34-v::
35--verbose::
36 Be more verbose (show symbol address, etc).
37
38-D::
39--dump-raw-trace::
40 Dump raw trace in ASCII.
41
27SEE ALSO 42SEE ALSO
28-------- 43--------
29linkperf:perf[1] 44linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 62de1b7f4e76..86b797a35aa6 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -115,9 +115,9 @@ Each probe argument follows below syntax.
115 115
116LINE SYNTAX 116LINE SYNTAX
117----------- 117-----------
118Line range is descripted by following syntax. 118Line range is described by following syntax.
119 119
120 "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]" 120 "FUNC[:RLN[+NUM|-RLN2]]|SRC[:ALN[+NUM|-ALN2]]"
121 121
122FUNC specifies the function name of showing lines. 'RLN' is the start line 122FUNC specifies the function name of showing lines. 'RLN' is the start line
123number from function entry line, and 'RLN2' is the end line number. As same as 123number from function entry line, and 'RLN2' is the end line number. As same as
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index a91f9f9e6e5c..52462ae26455 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -39,15 +39,24 @@ OPTIONS
39 be passed as follows: '\mem:addr[:[r][w][x]]'. 39 be passed as follows: '\mem:addr[:[r][w][x]]'.
40 If you want to profile read-write accesses in 0x1000, just set 40 If you want to profile read-write accesses in 0x1000, just set
41 'mem:0x1000:rw'. 41 'mem:0x1000:rw'.
42
43--filter=<filter>::
44 Event filter.
45
42-a:: 46-a::
43 System-wide collection. 47--all-cpus::
48 System-wide collection from all CPUs.
44 49
45-l:: 50-l::
46 Scale counter values. 51 Scale counter values.
47 52
48-p:: 53-p::
49--pid=:: 54--pid=::
50 Record events on existing pid. 55 Record events on existing process ID.
56
57-t::
58--tid=::
59 Record events on existing thread ID.
51 60
52-r:: 61-r::
53--realtime=:: 62--realtime=::
@@ -99,6 +108,11 @@ OPTIONS
99--data:: 108--data::
100 Sample addresses. 109 Sample addresses.
101 110
111-T::
112--timestamp::
113 Sample timestamps. Use it with 'perf report -D' to see the timestamps,
114 for instance.
115
102-n:: 116-n::
103--no-samples:: 117--no-samples::
104 Don't sample. 118 Don't sample.
@@ -109,8 +123,8 @@ Collect raw sample records from all opened counters (default for tracepoint coun
109 123
110-C:: 124-C::
111--cpu:: 125--cpu::
112Collect samples only on the list of cpus provided. Multiple CPUs can be provided as a 126Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a
113comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. 127comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
114In per-thread mode with inheritance mode on (default), samples are captured only when 128In per-thread mode with inheritance mode on (default), samples are captured only when
115the thread executes on the designated CPUs. Default is to monitor all CPUs. 129the thread executes on the designated CPUs. Default is to monitor all CPUs.
116 130
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 12052c9ed0ba..8ba03d6e5398 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -20,6 +20,11 @@ OPTIONS
20-i:: 20-i::
21--input=:: 21--input=::
22 Input file name. (default: perf.data) 22 Input file name. (default: perf.data)
23
24-v::
25--verbose::
26 Be more verbose. (show symbol address, etc)
27
23-d:: 28-d::
24--dsos=:: 29--dsos=::
25 Only consider symbols in these dsos. CSV that understands 30 Only consider symbols in these dsos. CSV that understands
@@ -27,6 +32,10 @@ OPTIONS
27-n:: 32-n::
28--show-nr-samples:: 33--show-nr-samples::
29 Show the number of samples for each symbol 34 Show the number of samples for each symbol
35
36--showcpuutilization::
37 Show sample percentage for different cpu modes.
38
30-T:: 39-T::
31--threads:: 40--threads::
32 Show per-thread event counters 41 Show per-thread event counters
@@ -39,12 +48,24 @@ OPTIONS
39 Only consider these symbols. CSV that understands 48 Only consider these symbols. CSV that understands
40 file://filename entries. 49 file://filename entries.
41 50
51-U::
52--hide-unresolved::
53 Only display entries resolved to a symbol.
54
42-s:: 55-s::
43--sort=:: 56--sort=::
44 Sort by key(s): pid, comm, dso, symbol, parent. 57 Sort by key(s): pid, comm, dso, symbol, parent.
45 58
59-p::
60--parent=<regex>::
61 regex filter to identify parent, see: '--sort parent'
62
63-x::
64--exclude-other::
65 Only display entries with parent-match.
66
46-w:: 67-w::
47--field-width=:: 68--column-widths=<width[,width...]>::
48 Force each column width to the provided list, for large terminal 69 Force each column width to the provided list, for large terminal
49 readability. 70 readability.
50 71
@@ -52,19 +73,26 @@ OPTIONS
52--field-separator=:: 73--field-separator=::
53 74
54 Use a special separator character and don't pad with spaces, replacing 75 Use a special separator character and don't pad with spaces, replacing
55 all occurances of this separator in symbol names (and other output) 76 all occurrences of this separator in symbol names (and other output)
56 with a '.' character, that thus it's the only non valid separator. 77 with a '.' character, that thus it's the only non valid separator.
57 78
79-D::
80--dump-raw-trace::
81 Dump raw trace in ASCII.
82
58-g [type,min]:: 83-g [type,min]::
59--call-graph:: 84--call-graph::
60 Display callchains using type and min percent threshold. 85 Display call chains using type and min percent threshold.
61 type can be either: 86 type can be either:
62 - flat: single column, linear exposure of callchains. 87 - flat: single column, linear exposure of call chains.
63 - graph: use a graph tree, displaying absolute overhead rates. 88 - graph: use a graph tree, displaying absolute overhead rates.
64 - fractal: like graph, but displays relative rates. Each branch of 89 - fractal: like graph, but displays relative rates. Each branch of
65 the tree is considered as a new profiled object. + 90 the tree is considered as a new profiled object. +
66 Default: fractal,0.5. 91 Default: fractal,0.5.
67 92
93--pretty=<key>::
94 Pretty printing style. key: normal, raw
95
68--stdio:: Use the stdio interface. 96--stdio:: Use the stdio interface.
69 97
70--tui:: Use the TUI interface, that is integrated with annotate and allows 98--tui:: Use the TUI interface, that is integrated with annotate and allows
@@ -72,6 +100,25 @@ OPTIONS
72 requires a tty, if one is not present, as when piping to other 100 requires a tty, if one is not present, as when piping to other
73 commands, the stdio interface is used. 101 commands, the stdio interface is used.
74 102
103-k::
104--vmlinux=<file>::
105 vmlinux pathname
106
107--kallsyms=<file>::
108 kallsyms pathname
109
110-m::
111--modules::
112 Load module symbols. WARNING: This should only be used with -k and
113 a LIVE kernel.
114
115-f::
116--force::
117 Don't complain, do it.
118
119--symfs=<directory>::
120 Look for files with symbols relative to this directory.
121
75SEE ALSO 122SEE ALSO
76-------- 123--------
77linkperf:perf-stat[1] 124linkperf:perf-stat[1]
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt
index 8417644a6166..46822d5fde1c 100644
--- a/tools/perf/Documentation/perf-sched.txt
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -8,11 +8,11 @@ perf-sched - Tool to trace/measure scheduler properties (latencies)
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf sched' {record|latency|replay|trace} 11'perf sched' {record|latency|map|replay|trace}
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
15There are four variants of perf sched: 15There are five variants of perf sched:
16 16
17 'perf sched record <command>' to record the scheduling events 17 'perf sched record <command>' to record the scheduling events
18 of an arbitrary workload. 18 of an arbitrary workload.
@@ -30,8 +30,22 @@ There are four variants of perf sched:
30 of the workload as it occurred when it was recorded - and can repeat 30 of the workload as it occurred when it was recorded - and can repeat
31 it a number of times, measuring its performance.) 31 it a number of times, measuring its performance.)
32 32
33 'perf sched map' to print a textual context-switching outline of
34 workload captured via perf sched record. Columns stand for
35 individual CPUs, and the two-letter shortcuts stand for tasks that
36 are running on a CPU. A '*' denotes the CPU that had the event, and
37 a dot signals an idle CPU.
38
33OPTIONS 39OPTIONS
34------- 40-------
41-i::
42--input=<file>::
43 Input file name. (default: perf.data)
44
45-v::
46--verbose::
47 Be more verbose. (show symbol address, etc)
48
35-D:: 49-D::
36--dump-raw-trace=:: 50--dump-raw-trace=::
37 Display verbose dump of the sched data. 51 Display verbose dump of the sched data.
diff --git a/tools/perf/Documentation/perf-trace-perl.txt b/tools/perf/Documentation/perf-script-perl.txt
index ee6525ee6d69..5bb41e55a3ac 100644
--- a/tools/perf/Documentation/perf-trace-perl.txt
+++ b/tools/perf/Documentation/perf-script-perl.txt
@@ -1,19 +1,19 @@
1perf-trace-perl(1) 1perf-script-perl(1)
2================== 2==================
3 3
4NAME 4NAME
5---- 5----
6perf-trace-perl - Process trace data with a Perl script 6perf-script-perl - Process trace data with a Perl script
7 7
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf trace' [-s [Perl]:script[.pl] ] 11'perf script' [-s [Perl]:script[.pl] ]
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
15 15
16This perf trace option is used to process perf trace data using perf's 16This perf script option is used to process perf script data using perf's
17built-in Perl interpreter. It reads and processes the input file and 17built-in Perl interpreter. It reads and processes the input file and
18displays the results of the trace analysis implemented in the given 18displays the results of the trace analysis implemented in the given
19Perl script, if any. 19Perl script, if any.
@@ -21,7 +21,7 @@ Perl script, if any.
21STARTER SCRIPTS 21STARTER SCRIPTS
22--------------- 22---------------
23 23
24You can avoid reading the rest of this document by running 'perf trace 24You can avoid reading the rest of this document by running 'perf script
25-g perl' in the same directory as an existing perf.data trace file. 25-g perl' in the same directory as an existing perf.data trace file.
26That will generate a starter script containing a handler for each of 26That will generate a starter script containing a handler for each of
27the event types in the trace file; it simply prints every available 27the event types in the trace file; it simply prints every available
@@ -30,13 +30,13 @@ field for each event in the trace file.
30You can also look at the existing scripts in 30You can also look at the existing scripts in
31~/libexec/perf-core/scripts/perl for typical examples showing how to 31~/libexec/perf-core/scripts/perl for typical examples showing how to
32do basic things like aggregate event data, print results, etc. Also, 32do basic things like aggregate event data, print results, etc. Also,
33the check-perf-trace.pl script, while not interesting for its results, 33the check-perf-script.pl script, while not interesting for its results,
34attempts to exercise all of the main scripting features. 34attempts to exercise all of the main scripting features.
35 35
36EVENT HANDLERS 36EVENT HANDLERS
37-------------- 37--------------
38 38
39When perf trace is invoked using a trace script, a user-defined 39When perf script is invoked using a trace script, a user-defined
40'handler function' is called for each event in the trace. If there's 40'handler function' is called for each event in the trace. If there's
41no handler function defined for a given event type, the event is 41no handler function defined for a given event type, the event is
42ignored (or passed to a 'trace_handled' function, see below) and the 42ignored (or passed to a 'trace_handled' function, see below) and the
@@ -112,13 +112,13 @@ write a useful trace script. The sections below cover the rest.
112SCRIPT LAYOUT 112SCRIPT LAYOUT
113------------- 113-------------
114 114
115Every perf trace Perl script should start by setting up a Perl module 115Every perf script Perl script should start by setting up a Perl module
116search path and 'use'ing a few support modules (see module 116search path and 'use'ing a few support modules (see module
117descriptions below): 117descriptions below):
118 118
119---- 119----
120 use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; 120 use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/perf-script-Util/lib";
121 use lib "./Perf-Trace-Util/lib"; 121 use lib "./perf-script-Util/lib";
122 use Perf::Trace::Core; 122 use Perf::Trace::Core;
123 use Perf::Trace::Context; 123 use Perf::Trace::Context;
124 use Perf::Trace::Util; 124 use Perf::Trace::Util;
@@ -162,7 +162,7 @@ sub trace_unhandled
162---- 162----
163 163
164The remaining sections provide descriptions of each of the available 164The remaining sections provide descriptions of each of the available
165built-in perf trace Perl modules and their associated functions. 165built-in perf script Perl modules and their associated functions.
166 166
167AVAILABLE MODULES AND FUNCTIONS 167AVAILABLE MODULES AND FUNCTIONS
168------------------------------- 168-------------------------------
@@ -170,7 +170,7 @@ AVAILABLE MODULES AND FUNCTIONS
170The following sections describe the functions and variables available 170The following sections describe the functions and variables available
171via the various Perf::Trace::* Perl modules. To use the functions and 171via the various Perf::Trace::* Perl modules. To use the functions and
172variables from the given module, add the corresponding 'use 172variables from the given module, add the corresponding 'use
173Perf::Trace::XXX' line to your perf trace script. 173Perf::Trace::XXX' line to your perf script script.
174 174
175Perf::Trace::Core Module 175Perf::Trace::Core Module
176~~~~~~~~~~~~~~~~~~~~~~~~ 176~~~~~~~~~~~~~~~~~~~~~~~~
@@ -204,7 +204,7 @@ argument.
204Perf::Trace::Util Module 204Perf::Trace::Util Module
205~~~~~~~~~~~~~~~~~~~~~~~~ 205~~~~~~~~~~~~~~~~~~~~~~~~
206 206
207Various utility functions for use with perf trace: 207Various utility functions for use with perf script:
208 208
209 nsecs($secs, $nsecs) - returns total nsecs given secs/nsecs pair 209 nsecs($secs, $nsecs) - returns total nsecs given secs/nsecs pair
210 nsecs_secs($nsecs) - returns whole secs portion given nsecs 210 nsecs_secs($nsecs) - returns whole secs portion given nsecs
@@ -214,4 +214,4 @@ Various utility functions for use with perf trace:
214 214
215SEE ALSO 215SEE ALSO
216-------- 216--------
217linkperf:perf-trace[1] 217linkperf:perf-script[1]
diff --git a/tools/perf/Documentation/perf-trace-python.txt b/tools/perf/Documentation/perf-script-python.txt
index 693be804dd3d..36b38277422c 100644
--- a/tools/perf/Documentation/perf-trace-python.txt
+++ b/tools/perf/Documentation/perf-script-python.txt
@@ -1,19 +1,19 @@
1perf-trace-python(1) 1perf-script-python(1)
2==================== 2====================
3 3
4NAME 4NAME
5---- 5----
6perf-trace-python - Process trace data with a Python script 6perf-script-python - Process trace data with a Python script
7 7
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf trace' [-s [Python]:script[.py] ] 11'perf script' [-s [Python]:script[.py] ]
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
15 15
16This perf trace option is used to process perf trace data using perf's 16This perf script option is used to process perf script data using perf's
17built-in Python interpreter. It reads and processes the input file and 17built-in Python interpreter. It reads and processes the input file and
18displays the results of the trace analysis implemented in the given 18displays the results of the trace analysis implemented in the given
19Python script, if any. 19Python script, if any.
@@ -23,15 +23,15 @@ A QUICK EXAMPLE
23 23
24This section shows the process, start to finish, of creating a working 24This section shows the process, start to finish, of creating a working
25Python script that aggregates and extracts useful information from a 25Python script that aggregates and extracts useful information from a
26raw perf trace stream. You can avoid reading the rest of this 26raw perf script stream. You can avoid reading the rest of this
27document if an example is enough for you; the rest of the document 27document if an example is enough for you; the rest of the document
28provides more details on each step and lists the library functions 28provides more details on each step and lists the library functions
29available to script writers. 29available to script writers.
30 30
31This example actually details the steps that were used to create the 31This example actually details the steps that were used to create the
32'syscall-counts' script you see when you list the available perf trace 32'syscall-counts' script you see when you list the available perf script
33scripts via 'perf trace -l'. As such, this script also shows how to 33scripts via 'perf script -l'. As such, this script also shows how to
34integrate your script into the list of general-purpose 'perf trace' 34integrate your script into the list of general-purpose 'perf script'
35scripts listed by that command. 35scripts listed by that command.
36 36
37The syscall-counts script is a simple script, but demonstrates all the 37The syscall-counts script is a simple script, but demonstrates all the
@@ -105,31 +105,31 @@ That single stream will be recorded in a file in the current directory
105called perf.data. 105called perf.data.
106 106
107Once we have a perf.data file containing our data, we can use the -g 107Once we have a perf.data file containing our data, we can use the -g
108'perf trace' option to generate a Python script that will contain a 108'perf script' option to generate a Python script that will contain a
109callback handler for each event type found in the perf.data trace 109callback handler for each event type found in the perf.data trace
110stream (for more details, see the STARTER SCRIPTS section). 110stream (for more details, see the STARTER SCRIPTS section).
111 111
112---- 112----
113# perf trace -g python 113# perf script -g python
114generated Python script: perf-trace.py 114generated Python script: perf-script.py
115 115
116The output file created also in the current directory is named 116The output file created also in the current directory is named
117perf-trace.py. Here's the file in its entirety: 117perf-script.py. Here's the file in its entirety:
118 118
119# perf trace event handlers, generated by perf trace -g python 119# perf script event handlers, generated by perf script -g python
120# Licensed under the terms of the GNU GPL License version 2 120# Licensed under the terms of the GNU GPL License version 2
121 121
122# The common_* event handler fields are the most useful fields common to 122# The common_* event handler fields are the most useful fields common to
123# all events. They don't necessarily correspond to the 'common_*' fields 123# all events. They don't necessarily correspond to the 'common_*' fields
124# in the format files. Those fields not available as handler params can 124# in the format files. Those fields not available as handler params can
125# be retrieved using Python functions of the form common_*(context). 125# be retrieved using Python functions of the form common_*(context).
126# See the perf-trace-python Documentation for the list of available functions. 126# See the perf-script-python Documentation for the list of available functions.
127 127
128import os 128import os
129import sys 129import sys
130 130
131sys.path.append(os.environ['PERF_EXEC_PATH'] + \ 131sys.path.append(os.environ['PERF_EXEC_PATH'] + \
132 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') 132 '/scripts/python/perf-script-Util/lib/Perf/Trace')
133 133
134from perf_trace_context import * 134from perf_trace_context import *
135from Core import * 135from Core import *
@@ -160,7 +160,7 @@ def print_header(event_name, cpu, secs, nsecs, pid, comm):
160---- 160----
161 161
162At the top is a comment block followed by some import statements and a 162At the top is a comment block followed by some import statements and a
163path append which every perf trace script should include. 163path append which every perf script script should include.
164 164
165Following that are a couple generated functions, trace_begin() and 165Following that are a couple generated functions, trace_begin() and
166trace_end(), which are called at the beginning and the end of the 166trace_end(), which are called at the beginning and the end of the
@@ -189,8 +189,8 @@ simply a utility function used for that purpose. Let's rename the
189script and run it to see the default output: 189script and run it to see the default output:
190 190
191---- 191----
192# mv perf-trace.py syscall-counts.py 192# mv perf-script.py syscall-counts.py
193# perf trace -s syscall-counts.py 193# perf script -s syscall-counts.py
194 194
195raw_syscalls__sys_enter 1 00840.847582083 7506 perf id=1, args= 195raw_syscalls__sys_enter 1 00840.847582083 7506 perf id=1, args=
196raw_syscalls__sys_enter 1 00840.847595764 7506 perf id=1, args= 196raw_syscalls__sys_enter 1 00840.847595764 7506 perf id=1, args=
@@ -216,7 +216,7 @@ import os
216import sys 216import sys
217 217
218sys.path.append(os.environ['PERF_EXEC_PATH'] + \ 218sys.path.append(os.environ['PERF_EXEC_PATH'] + \
219 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') 219 '/scripts/python/perf-script-Util/lib/Perf/Trace')
220 220
221from perf_trace_context import * 221from perf_trace_context import *
222from Core import * 222from Core import *
@@ -279,7 +279,7 @@ import os
279import sys 279import sys
280 280
281sys.path.append(os.environ['PERF_EXEC_PATH'] + \ 281sys.path.append(os.environ['PERF_EXEC_PATH'] + \
282 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') 282 '/scripts/python/perf-script-Util/lib/Perf/Trace')
283 283
284from perf_trace_context import * 284from perf_trace_context import *
285from Core import * 285from Core import *
@@ -315,7 +315,7 @@ def print_syscall_totals():
315 315
316The script can be run just as before: 316The script can be run just as before:
317 317
318 # perf trace -s syscall-counts.py 318 # perf script -s syscall-counts.py
319 319
320So those are the essential steps in writing and running a script. The 320So those are the essential steps in writing and running a script. The
321process can be generalized to any tracepoint or set of tracepoints 321process can be generalized to any tracepoint or set of tracepoints
@@ -324,17 +324,17 @@ interested in by looking at the list of available events shown by
324'perf list' and/or look in /sys/kernel/debug/tracing events for 324'perf list' and/or look in /sys/kernel/debug/tracing events for
325detailed event and field info, record the corresponding trace data 325detailed event and field info, record the corresponding trace data
326using 'perf record', passing it the list of interesting events, 326using 'perf record', passing it the list of interesting events,
327generate a skeleton script using 'perf trace -g python' and modify the 327generate a skeleton script using 'perf script -g python' and modify the
328code to aggregate and display it for your particular needs. 328code to aggregate and display it for your particular needs.
329 329
330After you've done that you may end up with a general-purpose script 330After you've done that you may end up with a general-purpose script
331that you want to keep around and have available for future use. By 331that you want to keep around and have available for future use. By
332writing a couple of very simple shell scripts and putting them in the 332writing a couple of very simple shell scripts and putting them in the
333right place, you can have your script listed alongside the other 333right place, you can have your script listed alongside the other
334scripts listed by the 'perf trace -l' command e.g.: 334scripts listed by the 'perf script -l' command e.g.:
335 335
336---- 336----
337root@tropicana:~# perf trace -l 337root@tropicana:~# perf script -l
338List of available trace scripts: 338List of available trace scripts:
339 workqueue-stats workqueue stats (ins/exe/create/destroy) 339 workqueue-stats workqueue stats (ins/exe/create/destroy)
340 wakeup-latency system-wide min/max/avg wakeup latency 340 wakeup-latency system-wide min/max/avg wakeup latency
@@ -365,14 +365,14 @@ perf record -a -e raw_syscalls:sys_enter
365The 'report' script is also a shell script with the same base name as 365The 'report' script is also a shell script with the same base name as
366your script, but with -report appended. It should also be located in 366your script, but with -report appended. It should also be located in
367the perf/scripts/python/bin directory. In that script, you write the 367the perf/scripts/python/bin directory. In that script, you write the
368'perf trace -s' command-line needed for running your script: 368'perf script -s' command-line needed for running your script:
369 369
370---- 370----
371# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-report 371# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-report
372 372
373#!/bin/bash 373#!/bin/bash
374# description: system-wide syscall counts 374# description: system-wide syscall counts
375perf trace -s ~/libexec/perf-core/scripts/python/syscall-counts.py 375perf script -s ~/libexec/perf-core/scripts/python/syscall-counts.py
376---- 376----
377 377
378Note that the location of the Python script given in the shell script 378Note that the location of the Python script given in the shell script
@@ -390,17 +390,17 @@ total 32
390drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 . 390drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 .
391drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 .. 391drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 ..
392drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin 392drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin
393-rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-trace.py 393-rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-script.py
394drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 Perf-Trace-Util 394drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 perf-script-Util
395-rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py 395-rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py
396---- 396----
397 397
398Once you've done that (don't forget to do a new 'make install', 398Once you've done that (don't forget to do a new 'make install',
399otherwise your script won't show up at run-time), 'perf trace -l' 399otherwise your script won't show up at run-time), 'perf script -l'
400should show a new entry for your script: 400should show a new entry for your script:
401 401
402---- 402----
403root@tropicana:~# perf trace -l 403root@tropicana:~# perf script -l
404List of available trace scripts: 404List of available trace scripts:
405 workqueue-stats workqueue stats (ins/exe/create/destroy) 405 workqueue-stats workqueue stats (ins/exe/create/destroy)
406 wakeup-latency system-wide min/max/avg wakeup latency 406 wakeup-latency system-wide min/max/avg wakeup latency
@@ -409,19 +409,19 @@ List of available trace scripts:
409 syscall-counts system-wide syscall counts 409 syscall-counts system-wide syscall counts
410---- 410----
411 411
412You can now perform the record step via 'perf trace record': 412You can now perform the record step via 'perf script record':
413 413
414 # perf trace record syscall-counts 414 # perf script record syscall-counts
415 415
416and display the output using 'perf trace report': 416and display the output using 'perf script report':
417 417
418 # perf trace report syscall-counts 418 # perf script report syscall-counts
419 419
420STARTER SCRIPTS 420STARTER SCRIPTS
421--------------- 421---------------
422 422
423You can quickly get started writing a script for a particular set of 423You can quickly get started writing a script for a particular set of
424trace data by generating a skeleton script using 'perf trace -g 424trace data by generating a skeleton script using 'perf script -g
425python' in the same directory as an existing perf.data trace file. 425python' in the same directory as an existing perf.data trace file.
426That will generate a starter script containing a handler for each of 426That will generate a starter script containing a handler for each of
427the event types in the trace file; it simply prints every available 427the event types in the trace file; it simply prints every available
@@ -430,13 +430,13 @@ field for each event in the trace file.
430You can also look at the existing scripts in 430You can also look at the existing scripts in
431~/libexec/perf-core/scripts/python for typical examples showing how to 431~/libexec/perf-core/scripts/python for typical examples showing how to
432do basic things like aggregate event data, print results, etc. Also, 432do basic things like aggregate event data, print results, etc. Also,
433the check-perf-trace.py script, while not interesting for its results, 433the check-perf-script.py script, while not interesting for its results,
434attempts to exercise all of the main scripting features. 434attempts to exercise all of the main scripting features.
435 435
436EVENT HANDLERS 436EVENT HANDLERS
437-------------- 437--------------
438 438
439When perf trace is invoked using a trace script, a user-defined 439When perf script is invoked using a trace script, a user-defined
440'handler function' is called for each event in the trace. If there's 440'handler function' is called for each event in the trace. If there's
441no handler function defined for a given event type, the event is 441no handler function defined for a given event type, the event is
442ignored (or passed to a 'trace_handled' function, see below) and the 442ignored (or passed to a 'trace_handled' function, see below) and the
@@ -510,7 +510,7 @@ write a useful trace script. The sections below cover the rest.
510SCRIPT LAYOUT 510SCRIPT LAYOUT
511------------- 511-------------
512 512
513Every perf trace Python script should start by setting up a Python 513Every perf script Python script should start by setting up a Python
514module search path and 'import'ing a few support modules (see module 514module search path and 'import'ing a few support modules (see module
515descriptions below): 515descriptions below):
516 516
@@ -519,7 +519,7 @@ descriptions below):
519 import sys 519 import sys
520 520
521 sys.path.append(os.environ['PERF_EXEC_PATH'] + \ 521 sys.path.append(os.environ['PERF_EXEC_PATH'] + \
522 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') 522 '/scripts/python/perf-script-Util/lib/Perf/Trace')
523 523
524 from perf_trace_context import * 524 from perf_trace_context import *
525 from Core import * 525 from Core import *
@@ -559,15 +559,15 @@ def trace_unhandled(event_name, context, common_cpu, common_secs,
559---- 559----
560 560
561The remaining sections provide descriptions of each of the available 561The remaining sections provide descriptions of each of the available
562built-in perf trace Python modules and their associated functions. 562built-in perf script Python modules and their associated functions.
563 563
564AVAILABLE MODULES AND FUNCTIONS 564AVAILABLE MODULES AND FUNCTIONS
565------------------------------- 565-------------------------------
566 566
567The following sections describe the functions and variables available 567The following sections describe the functions and variables available
568via the various perf trace Python modules. To use the functions and 568via the various perf script Python modules. To use the functions and
569variables from the given module, add the corresponding 'from XXXX 569variables from the given module, add the corresponding 'from XXXX
570import' line to your perf trace script. 570import' line to your perf script script.
571 571
572Core.py Module 572Core.py Module
573~~~~~~~~~~~~~~ 573~~~~~~~~~~~~~~
@@ -610,7 +610,7 @@ argument.
610Util.py Module 610Util.py Module
611~~~~~~~~~~~~~~ 611~~~~~~~~~~~~~~
612 612
613Various utility functions for use with perf trace: 613Various utility functions for use with perf script:
614 614
615 nsecs(secs, nsecs) - returns total nsecs given secs/nsecs pair 615 nsecs(secs, nsecs) - returns total nsecs given secs/nsecs pair
616 nsecs_secs(nsecs) - returns whole secs portion given nsecs 616 nsecs_secs(nsecs) - returns whole secs portion given nsecs
@@ -620,4 +620,4 @@ Various utility functions for use with perf trace:
620 620
621SEE ALSO 621SEE ALSO
622-------- 622--------
623linkperf:perf-trace[1] 623linkperf:perf-script[1]
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-script.txt
index 26aff6bf9e50..29ad94293cd2 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -1,71 +1,71 @@
1perf-trace(1) 1perf-script(1)
2============= 2=============
3 3
4NAME 4NAME
5---- 5----
6perf-trace - Read perf.data (created by perf record) and display trace output 6perf-script - Read perf.data (created by perf record) and display trace output
7 7
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf trace' [<options>] 11'perf script' [<options>]
12'perf trace' [<options>] record <script> [<record-options>] <command> 12'perf script' [<options>] record <script> [<record-options>] <command>
13'perf trace' [<options>] report <script> [script-args] 13'perf script' [<options>] report <script> [script-args]
14'perf trace' [<options>] <script> <required-script-args> [<record-options>] <command> 14'perf script' [<options>] <script> <required-script-args> [<record-options>] <command>
15'perf trace' [<options>] <top-script> [script-args] 15'perf script' [<options>] <top-script> [script-args]
16 16
17DESCRIPTION 17DESCRIPTION
18----------- 18-----------
19This command reads the input file and displays the trace recorded. 19This command reads the input file and displays the trace recorded.
20 20
21There are several variants of perf trace: 21There are several variants of perf script:
22 22
23 'perf trace' to see a detailed trace of the workload that was 23 'perf script' to see a detailed trace of the workload that was
24 recorded. 24 recorded.
25 25
26 You can also run a set of pre-canned scripts that aggregate and 26 You can also run a set of pre-canned scripts that aggregate and
27 summarize the raw trace data in various ways (the list of scripts is 27 summarize the raw trace data in various ways (the list of scripts is
28 available via 'perf trace -l'). The following variants allow you to 28 available via 'perf script -l'). The following variants allow you to
29 record and run those scripts: 29 record and run those scripts:
30 30
31 'perf trace record <script> <command>' to record the events required 31 'perf script record <script> <command>' to record the events required
32 for 'perf trace report'. <script> is the name displayed in the 32 for 'perf script report'. <script> is the name displayed in the
33 output of 'perf trace --list' i.e. the actual script name minus any 33 output of 'perf script --list' i.e. the actual script name minus any
34 language extension. If <command> is not specified, the events are 34 language extension. If <command> is not specified, the events are
35 recorded using the -a (system-wide) 'perf record' option. 35 recorded using the -a (system-wide) 'perf record' option.
36 36
37 'perf trace report <script> [args]' to run and display the results 37 'perf script report <script> [args]' to run and display the results
38 of <script>. <script> is the name displayed in the output of 'perf 38 of <script>. <script> is the name displayed in the output of 'perf
39 trace --list' i.e. the actual script name minus any language 39 trace --list' i.e. the actual script name minus any language
40 extension. The perf.data output from a previous run of 'perf trace 40 extension. The perf.data output from a previous run of 'perf script
41 record <script>' is used and should be present for this command to 41 record <script>' is used and should be present for this command to
42 succeed. [args] refers to the (mainly optional) args expected by 42 succeed. [args] refers to the (mainly optional) args expected by
43 the script. 43 the script.
44 44
45 'perf trace <script> <required-script-args> <command>' to both 45 'perf script <script> <required-script-args> <command>' to both
46 record the events required for <script> and to run the <script> 46 record the events required for <script> and to run the <script>
47 using 'live-mode' i.e. without writing anything to disk. <script> 47 using 'live-mode' i.e. without writing anything to disk. <script>
48 is the name displayed in the output of 'perf trace --list' i.e. the 48 is the name displayed in the output of 'perf script --list' i.e. the
49 actual script name minus any language extension. If <command> is 49 actual script name minus any language extension. If <command> is
50 not specified, the events are recorded using the -a (system-wide) 50 not specified, the events are recorded using the -a (system-wide)
51 'perf record' option. If <script> has any required args, they 51 'perf record' option. If <script> has any required args, they
52 should be specified before <command>. This mode doesn't allow for 52 should be specified before <command>. This mode doesn't allow for
53 optional script args to be specified; if optional script args are 53 optional script args to be specified; if optional script args are
54 desired, they can be specified using separate 'perf trace record' 54 desired, they can be specified using separate 'perf script record'
55 and 'perf trace report' commands, with the stdout of the record step 55 and 'perf script report' commands, with the stdout of the record step
56 piped to the stdin of the report script, using the '-o -' and '-i -' 56 piped to the stdin of the report script, using the '-o -' and '-i -'
57 options of the corresponding commands. 57 options of the corresponding commands.
58 58
59 'perf trace <top-script>' to both record the events required for 59 'perf script <top-script>' to both record the events required for
60 <top-script> and to run the <top-script> using 'live-mode' 60 <top-script> and to run the <top-script> using 'live-mode'
61 i.e. without writing anything to disk. <top-script> is the name 61 i.e. without writing anything to disk. <top-script> is the name
62 displayed in the output of 'perf trace --list' i.e. the actual 62 displayed in the output of 'perf script --list' i.e. the actual
63 script name minus any language extension; a <top-script> is defined 63 script name minus any language extension; a <top-script> is defined
64 as any script name ending with the string 'top'. 64 as any script name ending with the string 'top'.
65 65
66 [<record-options>] can be passed to the record steps of 'perf trace 66 [<record-options>] can be passed to the record steps of 'perf script
67 record' and 'live-mode' variants; this isn't possible however for 67 record' and 'live-mode' variants; this isn't possible however for
68 <top-script> 'live-mode' or 'perf trace report' variants. 68 <top-script> 'live-mode' or 'perf script report' variants.
69 69
70 See the 'SEE ALSO' section for links to language-specific 70 See the 'SEE ALSO' section for links to language-specific
71 information on how to write and run your own trace scripts. 71 information on how to write and run your own trace scripts.
@@ -76,7 +76,7 @@ OPTIONS
76 Any command you can specify in a shell. 76 Any command you can specify in a shell.
77 77
78-D:: 78-D::
79--dump-raw-trace=:: 79--dump-raw-script=::
80 Display verbose dump of the trace data. 80 Display verbose dump of the trace data.
81 81
82-L:: 82-L::
@@ -95,7 +95,7 @@ OPTIONS
95 95
96-g:: 96-g::
97--gen-script=:: 97--gen-script=::
98 Generate perf-trace.[ext] starter script for given language, 98 Generate perf-script.[ext] starter script for given language,
99 using current perf.data. 99 using current perf.data.
100 100
101-a:: 101-a::
@@ -104,8 +104,15 @@ OPTIONS
104 normally don't - this option allows the latter to be run in 104 normally don't - this option allows the latter to be run in
105 system-wide mode. 105 system-wide mode.
106 106
107-i::
108--input=::
109 Input file name.
110
111-d::
112--debug-mode::
113 Do various checks like samples ordering and lost events.
107 114
108SEE ALSO 115SEE ALSO
109-------- 116--------
110linkperf:perf-record[1], linkperf:perf-trace-perl[1], 117linkperf:perf-record[1], linkperf:perf-script-perl[1],
111linkperf:perf-trace-python[1] 118linkperf:perf-script-python[1]
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 4b3a2d46b437..b6da7affbbee 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -8,8 +8,8 @@ perf-stat - Run a command and gather performance counter statistics
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] <command> 11'perf stat' [-e <EVENT> | --event=EVENT] [-a] <command>
12'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] -- <command> [<options>] 12'perf stat' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>]
13 13
14DESCRIPTION 14DESCRIPTION
15----------- 15-----------
@@ -35,24 +35,54 @@ OPTIONS
35 child tasks do not inherit counters 35 child tasks do not inherit counters
36-p:: 36-p::
37--pid=<pid>:: 37--pid=<pid>::
38 stat events on existing pid 38 stat events on existing process id
39
40-t::
41--tid=<tid>::
42 stat events on existing thread id
43
39 44
40-a:: 45-a::
41 system-wide collection 46--all-cpus::
47 system-wide collection from all CPUs
42 48
43-c:: 49-c::
44 scale counter values 50--scale::
51 scale/normalize counter values
52
53-r::
54--repeat=<n>::
55 repeat command and print average + stddev (max: 100)
45 56
46-B:: 57-B::
58--big-num::
47 print large numbers with thousands' separators according to locale 59 print large numbers with thousands' separators according to locale
48 60
49-C:: 61-C::
50--cpu=:: 62--cpu=::
51Count only on the list of cpus provided. Multiple CPUs can be provided as a 63Count only on the list of CPUs provided. Multiple CPUs can be provided as a
52comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. 64comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
53In per-thread mode, this option is ignored. The -a option is still necessary 65In per-thread mode, this option is ignored. The -a option is still necessary
54to activate system-wide monitoring. Default is to count on all CPUs. 66to activate system-wide monitoring. Default is to count on all CPUs.
55 67
68-A::
69--no-aggr::
70Do not aggregate counts across all monitored CPUs in system-wide mode (-a).
71This option is only valid in system-wide mode.
72
73-n::
74--null::
75 null run - don't start any counters
76
77-v::
78--verbose::
79 be more verbose (show counter open errors, etc)
80
81-x SEP::
82--field-separator SEP::
83print counts using a CSV-style output to make it easy to import directly into
84spreadsheets. Columns are separated by the string specified in SEP.
85
56EXAMPLES 86EXAMPLES
57-------- 87--------
58 88
diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt
index 1c4b5f5b7f71..2c3b462f64b0 100644
--- a/tools/perf/Documentation/perf-test.txt
+++ b/tools/perf/Documentation/perf-test.txt
@@ -12,7 +12,7 @@ SYNOPSIS
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
15This command does assorted sanity tests, initially thru linked routines but 15This command does assorted sanity tests, initially through linked routines but
16also will look for a directory with more tests in the form of scripts. 16also will look for a directory with more tests in the form of scripts.
17 17
18OPTIONS 18OPTIONS
diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt
index 4b1788355eca..d7b79e2ba2ad 100644
--- a/tools/perf/Documentation/perf-timechart.txt
+++ b/tools/perf/Documentation/perf-timechart.txt
@@ -38,6 +38,8 @@ OPTIONS
38--process:: 38--process::
39 Select the processes to display, by name or PID 39 Select the processes to display, by name or PID
40 40
41--symfs=<directory>::
42 Look for files with symbols relative to this directory.
41 43
42SEE ALSO 44SEE ALSO
43-------- 45--------
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 1f9687663f2a..f6eb1cdafb77 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -12,7 +12,7 @@ SYNOPSIS
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
15This command generates and displays a performance counter profile in realtime. 15This command generates and displays a performance counter profile in real time.
16 16
17 17
18OPTIONS 18OPTIONS
@@ -27,8 +27,8 @@ OPTIONS
27 27
28-C <cpu-list>:: 28-C <cpu-list>::
29--cpu=<cpu>:: 29--cpu=<cpu>::
30Monitor only on the list of cpus provided. Multiple CPUs can be provided as a 30Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a
31comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. 31comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
32Default is to monitor all CPUS. 32Default is to monitor all CPUS.
33 33
34-d <seconds>:: 34-d <seconds>::
@@ -50,6 +50,10 @@ Default is to monitor all CPUS.
50--count-filter=<count>:: 50--count-filter=<count>::
51 Only display functions with more events than this. 51 Only display functions with more events than this.
52 52
53-g::
54--group::
55 Put the counters into a counter group.
56
53-F <freq>:: 57-F <freq>::
54--freq=<freq>:: 58--freq=<freq>::
55 Profile at this frequency. 59 Profile at this frequency.
@@ -68,7 +72,11 @@ Default is to monitor all CPUS.
68 72
69-p <pid>:: 73-p <pid>::
70--pid=<pid>:: 74--pid=<pid>::
71 Profile events on existing pid. 75 Profile events on existing Process ID.
76
77-t <tid>::
78--tid=<tid>::
79 Profile events on existing thread ID.
72 80
73-r <priority>:: 81-r <priority>::
74--realtime=<priority>:: 82--realtime=<priority>::
@@ -78,6 +86,18 @@ Default is to monitor all CPUS.
78--sym-annotate=<symbol>:: 86--sym-annotate=<symbol>::
79 Annotate this symbol. 87 Annotate this symbol.
80 88
89-K::
90--hide_kernel_symbols::
91 Hide kernel symbols.
92
93-U::
94--hide_user_symbols::
95 Hide user symbols.
96
97-D::
98--dump-symtab::
99 Dump the symbol table used for profiling.
100
81-v:: 101-v::
82--verbose:: 102--verbose::
83 Be more verbose (show counter open errors, etc). 103 Be more verbose (show counter open errors, etc).
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 8c7fc0c8f0b8..c12659d8cb26 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -7,6 +7,7 @@ include/linux/stringify.h
7lib/rbtree.c 7lib/rbtree.c
8include/linux/swab.h 8include/linux/swab.h
9arch/*/include/asm/unistd*.h 9arch/*/include/asm/unistd*.h
10arch/*/lib/memcpy*.S
10include/linux/poison.h 11include/linux/poison.h
11include/linux/magic.h 12include/linux/magic.h
12include/linux/hw_breakpoint.h 13include/linux/hw_breakpoint.h
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index d1db0f676a4b..1b9b13ee2a72 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -185,7 +185,10 @@ ifeq ($(ARCH),i386)
185 ARCH := x86 185 ARCH := x86
186endif 186endif
187ifeq ($(ARCH),x86_64) 187ifeq ($(ARCH),x86_64)
188 RAW_ARCH := x86_64
188 ARCH := x86 189 ARCH := x86
190 ARCH_CFLAGS := -DARCH_X86_64
191 ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S
189endif 192endif
190 193
191# CFLAGS and LDFLAGS are for the users to override from the command line. 194# CFLAGS and LDFLAGS are for the users to override from the command line.
@@ -375,6 +378,7 @@ LIB_H += util/include/linux/prefetch.h
375LIB_H += util/include/linux/rbtree.h 378LIB_H += util/include/linux/rbtree.h
376LIB_H += util/include/linux/string.h 379LIB_H += util/include/linux/string.h
377LIB_H += util/include/linux/types.h 380LIB_H += util/include/linux/types.h
381LIB_H += util/include/linux/linkage.h
378LIB_H += util/include/asm/asm-offsets.h 382LIB_H += util/include/asm/asm-offsets.h
379LIB_H += util/include/asm/bug.h 383LIB_H += util/include/asm/bug.h
380LIB_H += util/include/asm/byteorder.h 384LIB_H += util/include/asm/byteorder.h
@@ -383,6 +387,8 @@ LIB_H += util/include/asm/swab.h
383LIB_H += util/include/asm/system.h 387LIB_H += util/include/asm/system.h
384LIB_H += util/include/asm/uaccess.h 388LIB_H += util/include/asm/uaccess.h
385LIB_H += util/include/dwarf-regs.h 389LIB_H += util/include/dwarf-regs.h
390LIB_H += util/include/asm/dwarf2.h
391LIB_H += util/include/asm/cpufeature.h
386LIB_H += perf.h 392LIB_H += perf.h
387LIB_H += util/cache.h 393LIB_H += util/cache.h
388LIB_H += util/callchain.h 394LIB_H += util/callchain.h
@@ -390,6 +396,7 @@ LIB_H += util/build-id.h
390LIB_H += util/debug.h 396LIB_H += util/debug.h
391LIB_H += util/debugfs.h 397LIB_H += util/debugfs.h
392LIB_H += util/event.h 398LIB_H += util/event.h
399LIB_H += util/evsel.h
393LIB_H += util/exec_cmd.h 400LIB_H += util/exec_cmd.h
394LIB_H += util/types.h 401LIB_H += util/types.h
395LIB_H += util/levenshtein.h 402LIB_H += util/levenshtein.h
@@ -398,6 +405,7 @@ LIB_H += util/parse-options.h
398LIB_H += util/parse-events.h 405LIB_H += util/parse-events.h
399LIB_H += util/quote.h 406LIB_H += util/quote.h
400LIB_H += util/util.h 407LIB_H += util/util.h
408LIB_H += util/xyarray.h
401LIB_H += util/header.h 409LIB_H += util/header.h
402LIB_H += util/help.h 410LIB_H += util/help.h
403LIB_H += util/session.h 411LIB_H += util/session.h
@@ -417,6 +425,7 @@ LIB_H += util/probe-finder.h
417LIB_H += util/probe-event.h 425LIB_H += util/probe-event.h
418LIB_H += util/pstack.h 426LIB_H += util/pstack.h
419LIB_H += util/cpumap.h 427LIB_H += util/cpumap.h
428LIB_H += $(ARCH_INCLUDE)
420 429
421LIB_OBJS += $(OUTPUT)util/abspath.o 430LIB_OBJS += $(OUTPUT)util/abspath.o
422LIB_OBJS += $(OUTPUT)util/alias.o 431LIB_OBJS += $(OUTPUT)util/alias.o
@@ -426,6 +435,7 @@ LIB_OBJS += $(OUTPUT)util/ctype.o
426LIB_OBJS += $(OUTPUT)util/debugfs.o 435LIB_OBJS += $(OUTPUT)util/debugfs.o
427LIB_OBJS += $(OUTPUT)util/environment.o 436LIB_OBJS += $(OUTPUT)util/environment.o
428LIB_OBJS += $(OUTPUT)util/event.o 437LIB_OBJS += $(OUTPUT)util/event.o
438LIB_OBJS += $(OUTPUT)util/evsel.o
429LIB_OBJS += $(OUTPUT)util/exec_cmd.o 439LIB_OBJS += $(OUTPUT)util/exec_cmd.o
430LIB_OBJS += $(OUTPUT)util/help.o 440LIB_OBJS += $(OUTPUT)util/help.o
431LIB_OBJS += $(OUTPUT)util/levenshtein.o 441LIB_OBJS += $(OUTPUT)util/levenshtein.o
@@ -463,6 +473,7 @@ LIB_OBJS += $(OUTPUT)util/sort.o
463LIB_OBJS += $(OUTPUT)util/hist.o 473LIB_OBJS += $(OUTPUT)util/hist.o
464LIB_OBJS += $(OUTPUT)util/probe-event.o 474LIB_OBJS += $(OUTPUT)util/probe-event.o
465LIB_OBJS += $(OUTPUT)util/util.o 475LIB_OBJS += $(OUTPUT)util/util.o
476LIB_OBJS += $(OUTPUT)util/xyarray.o
466LIB_OBJS += $(OUTPUT)util/cpumap.o 477LIB_OBJS += $(OUTPUT)util/cpumap.o
467 478
468BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o 479BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
@@ -472,6 +483,9 @@ BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
472# Benchmark modules 483# Benchmark modules
473BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o 484BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o
474BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o 485BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o
486ifeq ($(RAW_ARCH),x86_64)
487BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
488endif
475BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o 489BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
476 490
477BUILTIN_OBJS += $(OUTPUT)builtin-diff.o 491BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
@@ -485,7 +499,7 @@ BUILTIN_OBJS += $(OUTPUT)builtin-report.o
485BUILTIN_OBJS += $(OUTPUT)builtin-stat.o 499BUILTIN_OBJS += $(OUTPUT)builtin-stat.o
486BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o 500BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o
487BUILTIN_OBJS += $(OUTPUT)builtin-top.o 501BUILTIN_OBJS += $(OUTPUT)builtin-top.o
488BUILTIN_OBJS += $(OUTPUT)builtin-trace.o 502BUILTIN_OBJS += $(OUTPUT)builtin-script.o
489BUILTIN_OBJS += $(OUTPUT)builtin-probe.o 503BUILTIN_OBJS += $(OUTPUT)builtin-probe.o
490BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o 504BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o
491BUILTIN_OBJS += $(OUTPUT)builtin-lock.o 505BUILTIN_OBJS += $(OUTPUT)builtin-lock.o
@@ -507,7 +521,7 @@ PERFLIBS = $(LIB_FILE)
507-include config.mak 521-include config.mak
508 522
509ifndef NO_DWARF 523ifndef NO_DWARF
510FLAGS_DWARF=$(ALL_CFLAGS) -I/usr/include/elfutils -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS) 524FLAGS_DWARF=$(ALL_CFLAGS) -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS)
511ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF)),y) 525ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF)),y)
512 msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev); 526 msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev);
513 NO_DWARF := 1 527 NO_DWARF := 1
@@ -554,7 +568,7 @@ ifndef NO_DWARF
554ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) 568ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
555 msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled); 569 msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
556else 570else
557 BASIC_CFLAGS += -I/usr/include/elfutils -DDWARF_SUPPORT 571 BASIC_CFLAGS += -DDWARF_SUPPORT
558 EXTLIBS += -lelf -ldw 572 EXTLIBS += -lelf -ldw
559 LIB_OBJS += $(OUTPUT)util/probe-finder.o 573 LIB_OBJS += $(OUTPUT)util/probe-finder.o
560endif # PERF_HAVE_DWARF_REGS 574endif # PERF_HAVE_DWARF_REGS
@@ -891,13 +905,14 @@ prefix_SQ = $(subst ','\'',$(prefix))
891SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) 905SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
892PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH)) 906PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH))
893 907
894LIBS = $(PERFLIBS) $(EXTLIBS) 908LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive $(EXTLIBS)
895 909
896BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \ 910BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \
897 $(COMPAT_CFLAGS) 911 $(COMPAT_CFLAGS)
898LIB_OBJS += $(COMPAT_OBJS) 912LIB_OBJS += $(COMPAT_OBJS)
899 913
900ALL_CFLAGS += $(BASIC_CFLAGS) 914ALL_CFLAGS += $(BASIC_CFLAGS)
915ALL_CFLAGS += $(ARCH_CFLAGS)
901ALL_LDFLAGS += $(BASIC_LDFLAGS) 916ALL_LDFLAGS += $(BASIC_LDFLAGS)
902 917
903export TAR INSTALL DESTDIR SHELL_PATH 918export TAR INSTALL DESTDIR SHELL_PATH
diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h
new file mode 100644
index 000000000000..a72e36cb5394
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy-arch.h
@@ -0,0 +1,12 @@
1
2#ifdef ARCH_X86_64
3
4#define MEMCPY_FN(fn, name, desc) \
5 extern void *fn(void *, const void *, size_t);
6
7#include "mem-memcpy-x86-64-asm-def.h"
8
9#undef MEMCPY_FN
10
11#endif
12
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
new file mode 100644
index 000000000000..d588b87696fc
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
@@ -0,0 +1,4 @@
1
2MEMCPY_FN(__memcpy,
3 "x86-64-unrolled",
4 "unrolled memcpy() in arch/x86/lib/memcpy_64.S")
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
new file mode 100644
index 000000000000..a57b66e853c2
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S
@@ -0,0 +1,2 @@
1
2#include "../../../arch/x86/lib/memcpy_64.S"
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index 38dae7465142..db82021f4b91 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -12,6 +12,7 @@
12#include "../util/parse-options.h" 12#include "../util/parse-options.h"
13#include "../util/header.h" 13#include "../util/header.h"
14#include "bench.h" 14#include "bench.h"
15#include "mem-memcpy-arch.h"
15 16
16#include <stdio.h> 17#include <stdio.h>
17#include <stdlib.h> 18#include <stdlib.h>
@@ -23,8 +24,10 @@
23 24
24static const char *length_str = "1MB"; 25static const char *length_str = "1MB";
25static const char *routine = "default"; 26static const char *routine = "default";
26static bool use_clock = false; 27static bool use_clock;
27static int clock_fd; 28static int clock_fd;
29static bool only_prefault;
30static bool no_prefault;
28 31
29static const struct option options[] = { 32static const struct option options[] = {
30 OPT_STRING('l', "length", &length_str, "1MB", 33 OPT_STRING('l', "length", &length_str, "1MB",
@@ -34,19 +37,33 @@ static const struct option options[] = {
34 "Specify routine to copy"), 37 "Specify routine to copy"),
35 OPT_BOOLEAN('c', "clock", &use_clock, 38 OPT_BOOLEAN('c', "clock", &use_clock,
36 "Use CPU clock for measuring"), 39 "Use CPU clock for measuring"),
40 OPT_BOOLEAN('o', "only-prefault", &only_prefault,
41 "Show only the result with page faults before memcpy()"),
42 OPT_BOOLEAN('n', "no-prefault", &no_prefault,
43 "Show only the result without page faults before memcpy()"),
37 OPT_END() 44 OPT_END()
38}; 45};
39 46
47typedef void *(*memcpy_t)(void *, const void *, size_t);
48
40struct routine { 49struct routine {
41 const char *name; 50 const char *name;
42 const char *desc; 51 const char *desc;
43 void * (*fn)(void *dst, const void *src, size_t len); 52 memcpy_t fn;
44}; 53};
45 54
46struct routine routines[] = { 55struct routine routines[] = {
47 { "default", 56 { "default",
48 "Default memcpy() provided by glibc", 57 "Default memcpy() provided by glibc",
49 memcpy }, 58 memcpy },
59#ifdef ARCH_X86_64
60
61#define MEMCPY_FN(fn, name, desc) { name, desc, fn },
62#include "mem-memcpy-x86-64-asm-def.h"
63#undef MEMCPY_FN
64
65#endif
66
50 { NULL, 67 { NULL,
51 NULL, 68 NULL,
52 NULL } 69 NULL }
@@ -89,29 +106,98 @@ static double timeval2double(struct timeval *ts)
89 (double)ts->tv_usec / (double)1000000; 106 (double)ts->tv_usec / (double)1000000;
90} 107}
91 108
109static void alloc_mem(void **dst, void **src, size_t length)
110{
111 *dst = zalloc(length);
112 if (!dst)
113 die("memory allocation failed - maybe length is too large?\n");
114
115 *src = zalloc(length);
116 if (!src)
117 die("memory allocation failed - maybe length is too large?\n");
118}
119
120static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
121{
122 u64 clock_start = 0ULL, clock_end = 0ULL;
123 void *src = NULL, *dst = NULL;
124
125 alloc_mem(&src, &dst, len);
126
127 if (prefault)
128 fn(dst, src, len);
129
130 clock_start = get_clock();
131 fn(dst, src, len);
132 clock_end = get_clock();
133
134 free(src);
135 free(dst);
136 return clock_end - clock_start;
137}
138
139static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
140{
141 struct timeval tv_start, tv_end, tv_diff;
142 void *src = NULL, *dst = NULL;
143
144 alloc_mem(&src, &dst, len);
145
146 if (prefault)
147 fn(dst, src, len);
148
149 BUG_ON(gettimeofday(&tv_start, NULL));
150 fn(dst, src, len);
151 BUG_ON(gettimeofday(&tv_end, NULL));
152
153 timersub(&tv_end, &tv_start, &tv_diff);
154
155 free(src);
156 free(dst);
157 return (double)((double)len / timeval2double(&tv_diff));
158}
159
160#define pf (no_prefault ? 0 : 1)
161
162#define print_bps(x) do { \
163 if (x < K) \
164 printf(" %14lf B/Sec", x); \
165 else if (x < K * K) \
166 printf(" %14lfd KB/Sec", x / K); \
167 else if (x < K * K * K) \
168 printf(" %14lf MB/Sec", x / K / K); \
169 else \
170 printf(" %14lf GB/Sec", x / K / K / K); \
171 } while (0)
172
92int bench_mem_memcpy(int argc, const char **argv, 173int bench_mem_memcpy(int argc, const char **argv,
93 const char *prefix __used) 174 const char *prefix __used)
94{ 175{
95 int i; 176 int i;
96 void *dst, *src; 177 size_t len;
97 size_t length; 178 double result_bps[2];
98 double bps = 0.0; 179 u64 result_clock[2];
99 struct timeval tv_start, tv_end, tv_diff;
100 u64 clock_start, clock_end, clock_diff;
101 180
102 clock_start = clock_end = clock_diff = 0ULL;
103 argc = parse_options(argc, argv, options, 181 argc = parse_options(argc, argv, options,
104 bench_mem_memcpy_usage, 0); 182 bench_mem_memcpy_usage, 0);
105 183
106 tv_diff.tv_sec = 0; 184 if (use_clock)
107 tv_diff.tv_usec = 0; 185 init_clock();
108 length = (size_t)perf_atoll((char *)length_str); 186
187 len = (size_t)perf_atoll((char *)length_str);
109 188
110 if ((s64)length <= 0) { 189 result_clock[0] = result_clock[1] = 0ULL;
190 result_bps[0] = result_bps[1] = 0.0;
191
192 if ((s64)len <= 0) {
111 fprintf(stderr, "Invalid length:%s\n", length_str); 193 fprintf(stderr, "Invalid length:%s\n", length_str);
112 return 1; 194 return 1;
113 } 195 }
114 196
197 /* same to without specifying either of prefault and no-prefault */
198 if (only_prefault && no_prefault)
199 only_prefault = no_prefault = false;
200
115 for (i = 0; routines[i].name; i++) { 201 for (i = 0; routines[i].name; i++) {
116 if (!strcmp(routines[i].name, routine)) 202 if (!strcmp(routines[i].name, routine))
117 break; 203 break;
@@ -126,61 +212,80 @@ int bench_mem_memcpy(int argc, const char **argv,
126 return 1; 212 return 1;
127 } 213 }
128 214
129 dst = zalloc(length); 215 if (bench_format == BENCH_FORMAT_DEFAULT)
130 if (!dst) 216 printf("# Copying %s Bytes ...\n\n", length_str);
131 die("memory allocation failed - maybe length is too large?\n");
132
133 src = zalloc(length);
134 if (!src)
135 die("memory allocation failed - maybe length is too large?\n");
136
137 if (bench_format == BENCH_FORMAT_DEFAULT) {
138 printf("# Copying %s Bytes from %p to %p ...\n\n",
139 length_str, src, dst);
140 }
141
142 if (use_clock) {
143 init_clock();
144 clock_start = get_clock();
145 } else {
146 BUG_ON(gettimeofday(&tv_start, NULL));
147 }
148
149 routines[i].fn(dst, src, length);
150 217
151 if (use_clock) { 218 if (!only_prefault && !no_prefault) {
152 clock_end = get_clock(); 219 /* show both of results */
153 clock_diff = clock_end - clock_start; 220 if (use_clock) {
221 result_clock[0] =
222 do_memcpy_clock(routines[i].fn, len, false);
223 result_clock[1] =
224 do_memcpy_clock(routines[i].fn, len, true);
225 } else {
226 result_bps[0] =
227 do_memcpy_gettimeofday(routines[i].fn,
228 len, false);
229 result_bps[1] =
230 do_memcpy_gettimeofday(routines[i].fn,
231 len, true);
232 }
154 } else { 233 } else {
155 BUG_ON(gettimeofday(&tv_end, NULL)); 234 if (use_clock) {
156 timersub(&tv_end, &tv_start, &tv_diff); 235 result_clock[pf] =
157 bps = (double)((double)length / timeval2double(&tv_diff)); 236 do_memcpy_clock(routines[i].fn,
237 len, only_prefault);
238 } else {
239 result_bps[pf] =
240 do_memcpy_gettimeofday(routines[i].fn,
241 len, only_prefault);
242 }
158 } 243 }
159 244
160 switch (bench_format) { 245 switch (bench_format) {
161 case BENCH_FORMAT_DEFAULT: 246 case BENCH_FORMAT_DEFAULT:
162 if (use_clock) { 247 if (!only_prefault && !no_prefault) {
163 printf(" %14lf Clock/Byte\n", 248 if (use_clock) {
164 (double)clock_diff / (double)length); 249 printf(" %14lf Clock/Byte\n",
165 } else { 250 (double)result_clock[0]
166 if (bps < K) 251 / (double)len);
167 printf(" %14lf B/Sec\n", bps); 252 printf(" %14lf Clock/Byte (with prefault)\n",
168 else if (bps < K * K) 253 (double)result_clock[1]
169 printf(" %14lfd KB/Sec\n", bps / 1024); 254 / (double)len);
170 else if (bps < K * K * K) 255 } else {
171 printf(" %14lf MB/Sec\n", bps / 1024 / 1024); 256 print_bps(result_bps[0]);
172 else { 257 printf("\n");
173 printf(" %14lf GB/Sec\n", 258 print_bps(result_bps[1]);
174 bps / 1024 / 1024 / 1024); 259 printf(" (with prefault)\n");
175 } 260 }
261 } else {
262 if (use_clock) {
263 printf(" %14lf Clock/Byte",
264 (double)result_clock[pf]
265 / (double)len);
266 } else
267 print_bps(result_bps[pf]);
268
269 printf("%s\n", only_prefault ? " (with prefault)" : "");
176 } 270 }
177 break; 271 break;
178 case BENCH_FORMAT_SIMPLE: 272 case BENCH_FORMAT_SIMPLE:
179 if (use_clock) { 273 if (!only_prefault && !no_prefault) {
180 printf("%14lf\n", 274 if (use_clock) {
181 (double)clock_diff / (double)length); 275 printf("%lf %lf\n",
182 } else 276 (double)result_clock[0] / (double)len,
183 printf("%lf\n", bps); 277 (double)result_clock[1] / (double)len);
278 } else {
279 printf("%lf %lf\n",
280 result_bps[0], result_bps[1]);
281 }
282 } else {
283 if (use_clock) {
284 printf("%lf\n", (double)result_clock[pf]
285 / (double)len);
286 } else
287 printf("%lf\n", result_bps[pf]);
288 }
184 break; 289 break;
185 default: 290 default:
186 /* reaching this means there's some disaster: */ 291 /* reaching this means there's some disaster: */
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 6d5604d8df95..c056cdc06912 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -58,12 +58,12 @@ static int hists__add_entry(struct hists *self, struct addr_location *al)
58 return hist_entry__inc_addr_samples(he, al->addr); 58 return hist_entry__inc_addr_samples(he, al->addr);
59} 59}
60 60
61static int process_sample_event(event_t *event, struct perf_session *session) 61static int process_sample_event(event_t *event, struct sample_data *sample,
62 struct perf_session *session)
62{ 63{
63 struct addr_location al; 64 struct addr_location al;
64 struct sample_data data;
65 65
66 if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) { 66 if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
67 pr_warning("problem processing %d event, skipping it.\n", 67 pr_warning("problem processing %d event, skipping it.\n",
68 event->header.type); 68 event->header.type);
69 return -1; 69 return -1;
@@ -375,6 +375,8 @@ static struct perf_event_ops event_ops = {
375 .mmap = event__process_mmap, 375 .mmap = event__process_mmap,
376 .comm = event__process_comm, 376 .comm = event__process_comm,
377 .fork = event__process_task, 377 .fork = event__process_task,
378 .ordered_samples = true,
379 .ordering_requires_timestamps = true,
378}; 380};
379 381
380static int __cmd_annotate(void) 382static int __cmd_annotate(void)
@@ -382,7 +384,7 @@ static int __cmd_annotate(void)
382 int ret; 384 int ret;
383 struct perf_session *session; 385 struct perf_session *session;
384 386
385 session = perf_session__new(input_name, O_RDONLY, force, false); 387 session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops);
386 if (session == NULL) 388 if (session == NULL)
387 return -ENOMEM; 389 return -ENOMEM;
388 390
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index 44a47e13bd67..5af32ae9031e 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -36,10 +36,10 @@ static const struct option options[] = {
36 36
37static int __cmd_buildid_list(void) 37static int __cmd_buildid_list(void)
38{ 38{
39 int err = -1;
40 struct perf_session *session; 39 struct perf_session *session;
41 40
42 session = perf_session__new(input_name, O_RDONLY, force, false); 41 session = perf_session__new(input_name, O_RDONLY, force, false,
42 &build_id__mark_dso_hit_ops);
43 if (session == NULL) 43 if (session == NULL)
44 return -1; 44 return -1;
45 45
@@ -49,7 +49,7 @@ static int __cmd_buildid_list(void)
49 perf_session__fprintf_dsos_buildid(session, stdout, with_hits); 49 perf_session__fprintf_dsos_buildid(session, stdout, with_hits);
50 50
51 perf_session__delete(session); 51 perf_session__delete(session);
52 return err; 52 return 0;
53} 53}
54 54
55int cmd_buildid_list(int argc, const char **argv, const char *prefix __used) 55int cmd_buildid_list(int argc, const char **argv, const char *prefix __used)
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index fca1d4402910..3153e492dbcc 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -30,12 +30,13 @@ static int hists__add_entry(struct hists *self,
30 return -ENOMEM; 30 return -ENOMEM;
31} 31}
32 32
33static int diff__process_sample_event(event_t *event, struct perf_session *session) 33static int diff__process_sample_event(event_t *event,
34 struct sample_data *sample,
35 struct perf_session *session)
34{ 36{
35 struct addr_location al; 37 struct addr_location al;
36 struct sample_data data = { .period = 1, };
37 38
38 if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) { 39 if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
39 pr_warning("problem processing %d event, skipping it.\n", 40 pr_warning("problem processing %d event, skipping it.\n",
40 event->header.type); 41 event->header.type);
41 return -1; 42 return -1;
@@ -44,12 +45,12 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi
44 if (al.filtered || al.sym == NULL) 45 if (al.filtered || al.sym == NULL)
45 return 0; 46 return 0;
46 47
47 if (hists__add_entry(&session->hists, &al, data.period)) { 48 if (hists__add_entry(&session->hists, &al, sample->period)) {
48 pr_warning("problem incrementing symbol period, skipping event\n"); 49 pr_warning("problem incrementing symbol period, skipping event\n");
49 return -1; 50 return -1;
50 } 51 }
51 52
52 session->hists.stats.total_period += data.period; 53 session->hists.stats.total_period += sample->period;
53 return 0; 54 return 0;
54} 55}
55 56
@@ -60,6 +61,8 @@ static struct perf_event_ops event_ops = {
60 .exit = event__process_task, 61 .exit = event__process_task,
61 .fork = event__process_task, 62 .fork = event__process_task,
62 .lost = event__process_lost, 63 .lost = event__process_lost,
64 .ordered_samples = true,
65 .ordering_requires_timestamps = true,
63}; 66};
64 67
65static void perf_session__insert_hist_entry_by_name(struct rb_root *root, 68static void perf_session__insert_hist_entry_by_name(struct rb_root *root,
@@ -141,8 +144,8 @@ static int __cmd_diff(void)
141 int ret, i; 144 int ret, i;
142 struct perf_session *session[2]; 145 struct perf_session *session[2];
143 146
144 session[0] = perf_session__new(input_old, O_RDONLY, force, false); 147 session[0] = perf_session__new(input_old, O_RDONLY, force, false, &event_ops);
145 session[1] = perf_session__new(input_new, O_RDONLY, force, false); 148 session[1] = perf_session__new(input_new, O_RDONLY, force, false, &event_ops);
146 if (session[0] == NULL || session[1] == NULL) 149 if (session[0] == NULL || session[1] == NULL)
147 return -ENOMEM; 150 return -ENOMEM;
148 151
@@ -173,7 +176,7 @@ static const char * const diff_usage[] = {
173static const struct option options[] = { 176static const struct option options[] = {
174 OPT_INCR('v', "verbose", &verbose, 177 OPT_INCR('v', "verbose", &verbose,
175 "be more verbose (show symbol address, etc)"), 178 "be more verbose (show symbol address, etc)"),
176 OPT_BOOLEAN('m', "displacement", &show_displacement, 179 OPT_BOOLEAN('M', "displacement", &show_displacement,
177 "Show position displacement relative to baseline"), 180 "Show position displacement relative to baseline"),
178 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 181 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
179 "dump raw trace in ASCII"), 182 "dump raw trace in ASCII"),
@@ -191,6 +194,8 @@ static const struct option options[] = {
191 OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator", 194 OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
192 "separator for columns, no spaces will be added between " 195 "separator for columns, no spaces will be added between "
193 "columns '.' is reserved."), 196 "columns '.' is reserved."),
197 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
198 "Look for files with symbols relative to this directory"),
194 OPT_END() 199 OPT_END()
195}; 200};
196 201
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 8e3e47b064ce..0c78ffa7bf67 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -16,8 +16,8 @@
16static char const *input_name = "-"; 16static char const *input_name = "-";
17static bool inject_build_ids; 17static bool inject_build_ids;
18 18
19static int event__repipe(event_t *event __used, 19static int event__repipe_synth(event_t *event,
20 struct perf_session *session __used) 20 struct perf_session *session __used)
21{ 21{
22 uint32_t size; 22 uint32_t size;
23 void *buf = event; 23 void *buf = event;
@@ -36,22 +36,30 @@ static int event__repipe(event_t *event __used,
36 return 0; 36 return 0;
37} 37}
38 38
39static int event__repipe_mmap(event_t *self, struct perf_session *session) 39static int event__repipe(event_t *event, struct sample_data *sample __used,
40 struct perf_session *session)
41{
42 return event__repipe_synth(event, session);
43}
44
45static int event__repipe_mmap(event_t *self, struct sample_data *sample,
46 struct perf_session *session)
40{ 47{
41 int err; 48 int err;
42 49
43 err = event__process_mmap(self, session); 50 err = event__process_mmap(self, sample, session);
44 event__repipe(self, session); 51 event__repipe(self, sample, session);
45 52
46 return err; 53 return err;
47} 54}
48 55
49static int event__repipe_task(event_t *self, struct perf_session *session) 56static int event__repipe_task(event_t *self, struct sample_data *sample,
57 struct perf_session *session)
50{ 58{
51 int err; 59 int err;
52 60
53 err = event__process_task(self, session); 61 err = event__process_task(self, sample, session);
54 event__repipe(self, session); 62 event__repipe(self, sample, session);
55 63
56 return err; 64 return err;
57} 65}
@@ -61,7 +69,7 @@ static int event__repipe_tracing_data(event_t *self,
61{ 69{
62 int err; 70 int err;
63 71
64 event__repipe(self, session); 72 event__repipe_synth(self, session);
65 err = event__process_tracing_data(self, session); 73 err = event__process_tracing_data(self, session);
66 74
67 return err; 75 return err;
@@ -111,7 +119,8 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session)
111 return 0; 119 return 0;
112} 120}
113 121
114static int event__inject_buildid(event_t *event, struct perf_session *session) 122static int event__inject_buildid(event_t *event, struct sample_data *sample,
123 struct perf_session *session)
115{ 124{
116 struct addr_location al; 125 struct addr_location al;
117 struct thread *thread; 126 struct thread *thread;
@@ -146,7 +155,7 @@ static int event__inject_buildid(event_t *event, struct perf_session *session)
146 } 155 }
147 156
148repipe: 157repipe:
149 event__repipe(event, session); 158 event__repipe(event, sample, session);
150 return 0; 159 return 0;
151} 160}
152 161
@@ -160,10 +169,10 @@ struct perf_event_ops inject_ops = {
160 .read = event__repipe, 169 .read = event__repipe,
161 .throttle = event__repipe, 170 .throttle = event__repipe,
162 .unthrottle = event__repipe, 171 .unthrottle = event__repipe,
163 .attr = event__repipe, 172 .attr = event__repipe_synth,
164 .event_type = event__repipe, 173 .event_type = event__repipe_synth,
165 .tracing_data = event__repipe, 174 .tracing_data = event__repipe_synth,
166 .build_id = event__repipe, 175 .build_id = event__repipe_synth,
167}; 176};
168 177
169extern volatile int session_done; 178extern volatile int session_done;
@@ -187,7 +196,7 @@ static int __cmd_inject(void)
187 inject_ops.tracing_data = event__repipe_tracing_data; 196 inject_ops.tracing_data = event__repipe_tracing_data;
188 } 197 }
189 198
190 session = perf_session__new(input_name, O_RDONLY, false, true); 199 session = perf_session__new(input_name, O_RDONLY, false, true, &inject_ops);
191 if (session == NULL) 200 if (session == NULL)
192 return -ENOMEM; 201 return -ENOMEM;
193 202
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 31f60a2535e0..def7ddc2fd4f 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -304,22 +304,11 @@ process_raw_event(event_t *raw_event __used, void *data,
304 } 304 }
305} 305}
306 306
307static int process_sample_event(event_t *event, struct perf_session *session) 307static int process_sample_event(event_t *event, struct sample_data *sample,
308 struct perf_session *session)
308{ 309{
309 struct sample_data data; 310 struct thread *thread = perf_session__findnew(session, event->ip.pid);
310 struct thread *thread;
311 311
312 memset(&data, 0, sizeof(data));
313 data.time = -1;
314 data.cpu = -1;
315 data.period = 1;
316
317 event__parse_sample(event, session->sample_type, &data);
318
319 dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
320 data.pid, data.tid, data.ip, data.period);
321
322 thread = perf_session__findnew(session, event->ip.pid);
323 if (thread == NULL) { 312 if (thread == NULL) {
324 pr_debug("problem processing %d event, skipping it.\n", 313 pr_debug("problem processing %d event, skipping it.\n",
325 event->header.type); 314 event->header.type);
@@ -328,8 +317,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
328 317
329 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); 318 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
330 319
331 process_raw_event(event, data.raw_data, data.cpu, 320 process_raw_event(event, sample->raw_data, sample->cpu,
332 data.time, thread); 321 sample->time, thread);
333 322
334 return 0; 323 return 0;
335} 324}
@@ -492,7 +481,8 @@ static void sort_result(void)
492static int __cmd_kmem(void) 481static int __cmd_kmem(void)
493{ 482{
494 int err = -EINVAL; 483 int err = -EINVAL;
495 struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0, false); 484 struct perf_session *session = perf_session__new(input_name, O_RDONLY,
485 0, false, &event_ops);
496 if (session == NULL) 486 if (session == NULL)
497 return -ENOMEM; 487 return -ENOMEM;
498 488
@@ -747,6 +737,9 @@ static int __cmd_record(int argc, const char **argv)
747 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 737 rec_argc = ARRAY_SIZE(record_args) + argc - 1;
748 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 738 rec_argv = calloc(rec_argc + 1, sizeof(char *));
749 739
740 if (rec_argv == NULL)
741 return -ENOMEM;
742
750 for (i = 0; i < ARRAY_SIZE(record_args); i++) 743 for (i = 0; i < ARRAY_SIZE(record_args); i++)
751 rec_argv[i] = strdup(record_args[i]); 744 rec_argv[i] = strdup(record_args[i]);
752 745
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 821c1586a22b..b9c6e5432971 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -834,22 +834,18 @@ static void dump_info(void)
834 die("Unknown type of information\n"); 834 die("Unknown type of information\n");
835} 835}
836 836
837static int process_sample_event(event_t *self, struct perf_session *s) 837static int process_sample_event(event_t *self, struct sample_data *sample,
838 struct perf_session *s)
838{ 839{
839 struct sample_data data; 840 struct thread *thread = perf_session__findnew(s, sample->tid);
840 struct thread *thread;
841 841
842 bzero(&data, sizeof(data));
843 event__parse_sample(self, s->sample_type, &data);
844
845 thread = perf_session__findnew(s, data.tid);
846 if (thread == NULL) { 842 if (thread == NULL) {
847 pr_debug("problem processing %d event, skipping it.\n", 843 pr_debug("problem processing %d event, skipping it.\n",
848 self->header.type); 844 self->header.type);
849 return -1; 845 return -1;
850 } 846 }
851 847
852 process_raw_event(data.raw_data, data.cpu, data.time, thread); 848 process_raw_event(sample->raw_data, sample->cpu, sample->time, thread);
853 849
854 return 0; 850 return 0;
855} 851}
@@ -862,7 +858,7 @@ static struct perf_event_ops eops = {
862 858
863static int read_events(void) 859static int read_events(void)
864{ 860{
865 session = perf_session__new(input_name, O_RDONLY, 0, false); 861 session = perf_session__new(input_name, O_RDONLY, 0, false, &eops);
866 if (!session) 862 if (!session)
867 die("Initializing perf session failed\n"); 863 die("Initializing perf session failed\n");
868 864
@@ -947,6 +943,9 @@ static int __cmd_record(int argc, const char **argv)
947 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 943 rec_argc = ARRAY_SIZE(record_args) + argc - 1;
948 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 944 rec_argv = calloc(rec_argc + 1, sizeof(char *));
949 945
946 if (rec_argv == NULL)
947 return -ENOMEM;
948
950 for (i = 0; i < ARRAY_SIZE(record_args); i++) 949 for (i = 0; i < ARRAY_SIZE(record_args); i++)
951 rec_argv[i] = strdup(record_args[i]); 950 rec_argv[i] = strdup(record_args[i]);
952 951
@@ -982,9 +981,9 @@ int cmd_lock(int argc, const char **argv, const char *prefix __used)
982 usage_with_options(report_usage, report_options); 981 usage_with_options(report_usage, report_options);
983 } 982 }
984 __cmd_report(); 983 __cmd_report();
985 } else if (!strcmp(argv[0], "trace")) { 984 } else if (!strcmp(argv[0], "script")) {
986 /* Aliased to 'perf trace' */ 985 /* Aliased to 'perf script' */
987 return cmd_trace(argc, argv, prefix); 986 return cmd_script(argc, argv, prefix);
988 } else if (!strcmp(argv[0], "info")) { 987 } else if (!strcmp(argv[0], "info")) {
989 if (argc) { 988 if (argc) {
990 argc = parse_options(argc, argv, 989 argc = parse_options(argc, argv,
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 2e000c068cc5..add163c9f0e7 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -249,6 +249,11 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
249 !params.show_lines)) 249 !params.show_lines))
250 usage_with_options(probe_usage, options); 250 usage_with_options(probe_usage, options);
251 251
252 /*
253 * Only consider the user's kernel image path if given.
254 */
255 symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
256
252 if (params.list_events) { 257 if (params.list_events) {
253 if (params.mod_events) { 258 if (params.mod_events) {
254 pr_err(" Error: Don't use --list with --add/--del.\n"); 259 pr_err(" Error: Don't use --list with --add/--del.\n");
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 564491fa18b2..7bc049035484 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -18,6 +18,7 @@
18 18
19#include "util/header.h" 19#include "util/header.h"
20#include "util/event.h" 20#include "util/event.h"
21#include "util/evsel.h"
21#include "util/debug.h" 22#include "util/debug.h"
22#include "util/session.h" 23#include "util/session.h"
23#include "util/symbol.h" 24#include "util/symbol.h"
@@ -27,17 +28,18 @@
27#include <sched.h> 28#include <sched.h>
28#include <sys/mman.h> 29#include <sys/mman.h>
29 30
31#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
32
30enum write_mode_t { 33enum write_mode_t {
31 WRITE_FORCE, 34 WRITE_FORCE,
32 WRITE_APPEND 35 WRITE_APPEND
33}; 36};
34 37
35static int *fd[MAX_NR_CPUS][MAX_COUNTERS];
36
37static u64 user_interval = ULLONG_MAX; 38static u64 user_interval = ULLONG_MAX;
38static u64 default_interval = 0; 39static u64 default_interval = 0;
40static u64 sample_type;
39 41
40static int nr_cpus = 0; 42static struct cpu_map *cpus;
41static unsigned int page_size; 43static unsigned int page_size;
42static unsigned int mmap_pages = 128; 44static unsigned int mmap_pages = 128;
43static unsigned int user_freq = UINT_MAX; 45static unsigned int user_freq = UINT_MAX;
@@ -48,11 +50,11 @@ static const char *output_name = "perf.data";
48static int group = 0; 50static int group = 0;
49static int realtime_prio = 0; 51static int realtime_prio = 0;
50static bool raw_samples = false; 52static bool raw_samples = false;
53static bool sample_id_all_avail = true;
51static bool system_wide = false; 54static bool system_wide = false;
52static pid_t target_pid = -1; 55static pid_t target_pid = -1;
53static pid_t target_tid = -1; 56static pid_t target_tid = -1;
54static pid_t *all_tids = NULL; 57static struct thread_map *threads;
55static int thread_num = 0;
56static pid_t child_pid = -1; 58static pid_t child_pid = -1;
57static bool no_inherit = false; 59static bool no_inherit = false;
58static enum write_mode_t write_mode = WRITE_FORCE; 60static enum write_mode_t write_mode = WRITE_FORCE;
@@ -60,7 +62,9 @@ static bool call_graph = false;
60static bool inherit_stat = false; 62static bool inherit_stat = false;
61static bool no_samples = false; 63static bool no_samples = false;
62static bool sample_address = false; 64static bool sample_address = false;
65static bool sample_time = false;
63static bool no_buildid = false; 66static bool no_buildid = false;
67static bool no_buildid_cache = false;
64 68
65static long samples = 0; 69static long samples = 0;
66static u64 bytes_written = 0; 70static u64 bytes_written = 0;
@@ -77,7 +81,6 @@ static struct perf_session *session;
77static const char *cpu_list; 81static const char *cpu_list;
78 82
79struct mmap_data { 83struct mmap_data {
80 int counter;
81 void *base; 84 void *base;
82 unsigned int mask; 85 unsigned int mask;
83 unsigned int prev; 86 unsigned int prev;
@@ -128,6 +131,7 @@ static void write_output(void *buf, size_t size)
128} 131}
129 132
130static int process_synthesized_event(event_t *event, 133static int process_synthesized_event(event_t *event,
134 struct sample_data *sample __used,
131 struct perf_session *self __used) 135 struct perf_session *self __used)
132{ 136{
133 write_output(event, event->header.size); 137 write_output(event, event->header.size);
@@ -224,12 +228,12 @@ static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int n
224 return h_attr; 228 return h_attr;
225} 229}
226 230
227static void create_counter(int counter, int cpu) 231static void create_counter(struct perf_evsel *evsel, int cpu)
228{ 232{
229 char *filter = filters[counter]; 233 char *filter = evsel->filter;
230 struct perf_event_attr *attr = attrs + counter; 234 struct perf_event_attr *attr = &evsel->attr;
231 struct perf_header_attr *h_attr; 235 struct perf_header_attr *h_attr;
232 int track = !counter; /* only the first counter needs these */ 236 int track = !evsel->idx; /* only the first counter needs these */
233 int thread_index; 237 int thread_index;
234 int ret; 238 int ret;
235 struct { 239 struct {
@@ -238,6 +242,19 @@ static void create_counter(int counter, int cpu)
238 u64 time_running; 242 u64 time_running;
239 u64 id; 243 u64 id;
240 } read_data; 244 } read_data;
245 /*
246 * Check if parse_single_tracepoint_event has already asked for
247 * PERF_SAMPLE_TIME.
248 *
249 * XXX this is kludgy but short term fix for problems introduced by
250 * eac23d1c that broke 'perf script' by having different sample_types
251 * when using multiple tracepoint events when we use a perf binary
252 * that tries to use sample_id_all on an older kernel.
253 *
254 * We need to move counter creation to perf_session, support
255 * different sample_types, etc.
256 */
257 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
241 258
242 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 259 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
243 PERF_FORMAT_TOTAL_TIME_RUNNING | 260 PERF_FORMAT_TOTAL_TIME_RUNNING |
@@ -280,6 +297,10 @@ static void create_counter(int counter, int cpu)
280 if (system_wide) 297 if (system_wide)
281 attr->sample_type |= PERF_SAMPLE_CPU; 298 attr->sample_type |= PERF_SAMPLE_CPU;
282 299
300 if (sample_id_all_avail &&
301 (sample_time || system_wide || !no_inherit || cpu_list))
302 attr->sample_type |= PERF_SAMPLE_TIME;
303
283 if (raw_samples) { 304 if (raw_samples) {
284 attr->sample_type |= PERF_SAMPLE_TIME; 305 attr->sample_type |= PERF_SAMPLE_TIME;
285 attr->sample_type |= PERF_SAMPLE_RAW; 306 attr->sample_type |= PERF_SAMPLE_RAW;
@@ -293,13 +314,14 @@ static void create_counter(int counter, int cpu)
293 attr->disabled = 1; 314 attr->disabled = 1;
294 attr->enable_on_exec = 1; 315 attr->enable_on_exec = 1;
295 } 316 }
317retry_sample_id:
318 attr->sample_id_all = sample_id_all_avail ? 1 : 0;
296 319
297 for (thread_index = 0; thread_index < thread_num; thread_index++) { 320 for (thread_index = 0; thread_index < threads->nr; thread_index++) {
298try_again: 321try_again:
299 fd[nr_cpu][counter][thread_index] = sys_perf_event_open(attr, 322 FD(evsel, nr_cpu, thread_index) = sys_perf_event_open(attr, threads->map[thread_index], cpu, group_fd, 0);
300 all_tids[thread_index], cpu, group_fd, 0);
301 323
302 if (fd[nr_cpu][counter][thread_index] < 0) { 324 if (FD(evsel, nr_cpu, thread_index) < 0) {
303 int err = errno; 325 int err = errno;
304 326
305 if (err == EPERM || err == EACCES) 327 if (err == EPERM || err == EACCES)
@@ -309,6 +331,15 @@ try_again:
309 else if (err == ENODEV && cpu_list) { 331 else if (err == ENODEV && cpu_list) {
310 die("No such device - did you specify" 332 die("No such device - did you specify"
311 " an out-of-range profile CPU?\n"); 333 " an out-of-range profile CPU?\n");
334 } else if (err == EINVAL && sample_id_all_avail) {
335 /*
336 * Old kernel, no attr->sample_id_type_all field
337 */
338 sample_id_all_avail = false;
339 if (!sample_time && !raw_samples && !time_needed)
340 attr->sample_type &= ~PERF_SAMPLE_TIME;
341
342 goto retry_sample_id;
312 } 343 }
313 344
314 /* 345 /*
@@ -326,8 +357,8 @@ try_again:
326 goto try_again; 357 goto try_again;
327 } 358 }
328 printf("\n"); 359 printf("\n");
329 error("perfcounter syscall returned with %d (%s)\n", 360 error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
330 fd[nr_cpu][counter][thread_index], strerror(err)); 361 FD(evsel, nr_cpu, thread_index), strerror(err));
331 362
332#if defined(__i386__) || defined(__x86_64__) 363#if defined(__i386__) || defined(__x86_64__)
333 if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP) 364 if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
@@ -341,7 +372,7 @@ try_again:
341 exit(-1); 372 exit(-1);
342 } 373 }
343 374
344 h_attr = get_header_attr(attr, counter); 375 h_attr = get_header_attr(attr, evsel->idx);
345 if (h_attr == NULL) 376 if (h_attr == NULL)
346 die("nomem\n"); 377 die("nomem\n");
347 378
@@ -352,7 +383,7 @@ try_again:
352 } 383 }
353 } 384 }
354 385
355 if (read(fd[nr_cpu][counter][thread_index], &read_data, sizeof(read_data)) == -1) { 386 if (read(FD(evsel, nr_cpu, thread_index), &read_data, sizeof(read_data)) == -1) {
356 perror("Unable to read perf file descriptor"); 387 perror("Unable to read perf file descriptor");
357 exit(-1); 388 exit(-1);
358 } 389 }
@@ -362,43 +393,44 @@ try_again:
362 exit(-1); 393 exit(-1);
363 } 394 }
364 395
365 assert(fd[nr_cpu][counter][thread_index] >= 0); 396 assert(FD(evsel, nr_cpu, thread_index) >= 0);
366 fcntl(fd[nr_cpu][counter][thread_index], F_SETFL, O_NONBLOCK); 397 fcntl(FD(evsel, nr_cpu, thread_index), F_SETFL, O_NONBLOCK);
367 398
368 /* 399 /*
369 * First counter acts as the group leader: 400 * First counter acts as the group leader:
370 */ 401 */
371 if (group && group_fd == -1) 402 if (group && group_fd == -1)
372 group_fd = fd[nr_cpu][counter][thread_index]; 403 group_fd = FD(evsel, nr_cpu, thread_index);
373 404
374 if (counter || thread_index) { 405 if (evsel->idx || thread_index) {
375 ret = ioctl(fd[nr_cpu][counter][thread_index], 406 struct perf_evsel *first;
376 PERF_EVENT_IOC_SET_OUTPUT, 407 first = list_entry(evsel_list.next, struct perf_evsel, node);
377 fd[nr_cpu][0][0]); 408 ret = ioctl(FD(evsel, nr_cpu, thread_index),
409 PERF_EVENT_IOC_SET_OUTPUT,
410 FD(first, nr_cpu, 0));
378 if (ret) { 411 if (ret) {
379 error("failed to set output: %d (%s)\n", errno, 412 error("failed to set output: %d (%s)\n", errno,
380 strerror(errno)); 413 strerror(errno));
381 exit(-1); 414 exit(-1);
382 } 415 }
383 } else { 416 } else {
384 mmap_array[nr_cpu].counter = counter;
385 mmap_array[nr_cpu].prev = 0; 417 mmap_array[nr_cpu].prev = 0;
386 mmap_array[nr_cpu].mask = mmap_pages*page_size - 1; 418 mmap_array[nr_cpu].mask = mmap_pages*page_size - 1;
387 mmap_array[nr_cpu].base = mmap(NULL, (mmap_pages+1)*page_size, 419 mmap_array[nr_cpu].base = mmap(NULL, (mmap_pages+1)*page_size,
388 PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter][thread_index], 0); 420 PROT_READ | PROT_WRITE, MAP_SHARED, FD(evsel, nr_cpu, thread_index), 0);
389 if (mmap_array[nr_cpu].base == MAP_FAILED) { 421 if (mmap_array[nr_cpu].base == MAP_FAILED) {
390 error("failed to mmap with %d (%s)\n", errno, strerror(errno)); 422 error("failed to mmap with %d (%s)\n", errno, strerror(errno));
391 exit(-1); 423 exit(-1);
392 } 424 }
393 425
394 event_array[nr_poll].fd = fd[nr_cpu][counter][thread_index]; 426 event_array[nr_poll].fd = FD(evsel, nr_cpu, thread_index);
395 event_array[nr_poll].events = POLLIN; 427 event_array[nr_poll].events = POLLIN;
396 nr_poll++; 428 nr_poll++;
397 } 429 }
398 430
399 if (filter != NULL) { 431 if (filter != NULL) {
400 ret = ioctl(fd[nr_cpu][counter][thread_index], 432 ret = ioctl(FD(evsel, nr_cpu, thread_index),
401 PERF_EVENT_IOC_SET_FILTER, filter); 433 PERF_EVENT_IOC_SET_FILTER, filter);
402 if (ret) { 434 if (ret) {
403 error("failed to set filter with %d (%s)\n", errno, 435 error("failed to set filter with %d (%s)\n", errno,
404 strerror(errno)); 436 strerror(errno));
@@ -406,15 +438,19 @@ try_again:
406 } 438 }
407 } 439 }
408 } 440 }
441
442 if (!sample_type)
443 sample_type = attr->sample_type;
409} 444}
410 445
411static void open_counters(int cpu) 446static void open_counters(int cpu)
412{ 447{
413 int counter; 448 struct perf_evsel *pos;
414 449
415 group_fd = -1; 450 group_fd = -1;
416 for (counter = 0; counter < nr_counters; counter++) 451
417 create_counter(counter, cpu); 452 list_for_each_entry(pos, &evsel_list, node)
453 create_counter(pos, cpu);
418 454
419 nr_cpu++; 455 nr_cpu++;
420} 456}
@@ -437,7 +473,8 @@ static void atexit_header(void)
437 if (!pipe_output) { 473 if (!pipe_output) {
438 session->header.data_size += bytes_written; 474 session->header.data_size += bytes_written;
439 475
440 process_buildids(); 476 if (!no_buildid)
477 process_buildids();
441 perf_header__write(&session->header, output, true); 478 perf_header__write(&session->header, output, true);
442 perf_session__delete(session); 479 perf_session__delete(session);
443 symbol__exit(); 480 symbol__exit();
@@ -500,7 +537,7 @@ static void mmap_read_all(void)
500 537
501static int __cmd_record(int argc, const char **argv) 538static int __cmd_record(int argc, const char **argv)
502{ 539{
503 int i, counter; 540 int i;
504 struct stat st; 541 struct stat st;
505 int flags; 542 int flags;
506 int err; 543 int err;
@@ -552,19 +589,22 @@ static int __cmd_record(int argc, const char **argv)
552 } 589 }
553 590
554 session = perf_session__new(output_name, O_WRONLY, 591 session = perf_session__new(output_name, O_WRONLY,
555 write_mode == WRITE_FORCE, false); 592 write_mode == WRITE_FORCE, false, NULL);
556 if (session == NULL) { 593 if (session == NULL) {
557 pr_err("Not enough memory for reading perf file header\n"); 594 pr_err("Not enough memory for reading perf file header\n");
558 return -1; 595 return -1;
559 } 596 }
560 597
598 if (!no_buildid)
599 perf_header__set_feat(&session->header, HEADER_BUILD_ID);
600
561 if (!file_new) { 601 if (!file_new) {
562 err = perf_header__read(session, output); 602 err = perf_header__read(session, output);
563 if (err < 0) 603 if (err < 0)
564 goto out_delete_session; 604 goto out_delete_session;
565 } 605 }
566 606
567 if (have_tracepoints(attrs, nr_counters)) 607 if (have_tracepoints(&evsel_list))
568 perf_header__set_feat(&session->header, HEADER_TRACE_INFO); 608 perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
569 609
570 /* 610 /*
@@ -612,7 +652,7 @@ static int __cmd_record(int argc, const char **argv)
612 } 652 }
613 653
614 if (!system_wide && target_tid == -1 && target_pid == -1) 654 if (!system_wide && target_tid == -1 && target_pid == -1)
615 all_tids[0] = child_pid; 655 threads->map[0] = child_pid;
616 656
617 close(child_ready_pipe[1]); 657 close(child_ready_pipe[1]);
618 close(go_pipe[0]); 658 close(go_pipe[0]);
@@ -626,19 +666,15 @@ static int __cmd_record(int argc, const char **argv)
626 close(child_ready_pipe[0]); 666 close(child_ready_pipe[0]);
627 } 667 }
628 668
629 nr_cpus = read_cpu_map(cpu_list);
630 if (nr_cpus < 1) {
631 perror("failed to collect number of CPUs");
632 return -1;
633 }
634
635 if (!system_wide && no_inherit && !cpu_list) { 669 if (!system_wide && no_inherit && !cpu_list) {
636 open_counters(-1); 670 open_counters(-1);
637 } else { 671 } else {
638 for (i = 0; i < nr_cpus; i++) 672 for (i = 0; i < cpus->nr; i++)
639 open_counters(cpumap[i]); 673 open_counters(cpus->map[i]);
640 } 674 }
641 675
676 perf_session__set_sample_type(session, sample_type);
677
642 if (pipe_output) { 678 if (pipe_output) {
643 err = perf_header__write_pipe(output); 679 err = perf_header__write_pipe(output);
644 if (err < 0) 680 if (err < 0)
@@ -651,6 +687,8 @@ static int __cmd_record(int argc, const char **argv)
651 687
652 post_processing_offset = lseek(output, 0, SEEK_CUR); 688 post_processing_offset = lseek(output, 0, SEEK_CUR);
653 689
690 perf_session__set_sample_id_all(session, sample_id_all_avail);
691
654 if (pipe_output) { 692 if (pipe_output) {
655 err = event__synthesize_attrs(&session->header, 693 err = event__synthesize_attrs(&session->header,
656 process_synthesized_event, 694 process_synthesized_event,
@@ -667,7 +705,7 @@ static int __cmd_record(int argc, const char **argv)
667 return err; 705 return err;
668 } 706 }
669 707
670 if (have_tracepoints(attrs, nr_counters)) { 708 if (have_tracepoints(&evsel_list)) {
671 /* 709 /*
672 * FIXME err <= 0 here actually means that 710 * FIXME err <= 0 here actually means that
673 * there were no tracepoints so its not really 711 * there were no tracepoints so its not really
@@ -676,8 +714,7 @@ static int __cmd_record(int argc, const char **argv)
676 * return this more properly and also 714 * return this more properly and also
677 * propagate errors that now are calling die() 715 * propagate errors that now are calling die()
678 */ 716 */
679 err = event__synthesize_tracing_data(output, attrs, 717 err = event__synthesize_tracing_data(output, &evsel_list,
680 nr_counters,
681 process_synthesized_event, 718 process_synthesized_event,
682 session); 719 session);
683 if (err <= 0) { 720 if (err <= 0) {
@@ -751,13 +788,13 @@ static int __cmd_record(int argc, const char **argv)
751 788
752 if (done) { 789 if (done) {
753 for (i = 0; i < nr_cpu; i++) { 790 for (i = 0; i < nr_cpu; i++) {
754 for (counter = 0; 791 struct perf_evsel *pos;
755 counter < nr_counters; 792
756 counter++) { 793 list_for_each_entry(pos, &evsel_list, node) {
757 for (thread = 0; 794 for (thread = 0;
758 thread < thread_num; 795 thread < threads->nr;
759 thread++) 796 thread++)
760 ioctl(fd[i][counter][thread], 797 ioctl(FD(pos, i, thread),
761 PERF_EVENT_IOC_DISABLE); 798 PERF_EVENT_IOC_DISABLE);
762 } 799 }
763 } 800 }
@@ -831,16 +868,20 @@ const struct option record_options[] = {
831 "per thread counts"), 868 "per thread counts"),
832 OPT_BOOLEAN('d', "data", &sample_address, 869 OPT_BOOLEAN('d', "data", &sample_address,
833 "Sample addresses"), 870 "Sample addresses"),
871 OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
834 OPT_BOOLEAN('n', "no-samples", &no_samples, 872 OPT_BOOLEAN('n', "no-samples", &no_samples,
835 "don't sample"), 873 "don't sample"),
836 OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid, 874 OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
837 "do not update the buildid cache"), 875 "do not update the buildid cache"),
876 OPT_BOOLEAN('B', "no-buildid", &no_buildid,
877 "do not collect buildids in perf.data"),
838 OPT_END() 878 OPT_END()
839}; 879};
840 880
841int cmd_record(int argc, const char **argv, const char *prefix __used) 881int cmd_record(int argc, const char **argv, const char *prefix __used)
842{ 882{
843 int i, j, err = -ENOMEM; 883 int err = -ENOMEM;
884 struct perf_evsel *pos;
844 885
845 argc = parse_options(argc, argv, record_options, record_usage, 886 argc = parse_options(argc, argv, record_options, record_usage,
846 PARSE_OPT_STOP_AT_NON_OPTION); 887 PARSE_OPT_STOP_AT_NON_OPTION);
@@ -859,41 +900,36 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
859 } 900 }
860 901
861 symbol__init(); 902 symbol__init();
862 if (no_buildid) 903
904 if (no_buildid_cache || no_buildid)
863 disable_buildid_cache(); 905 disable_buildid_cache();
864 906
865 if (!nr_counters) { 907 if (list_empty(&evsel_list) && perf_evsel_list__create_default() < 0) {
866 nr_counters = 1; 908 pr_err("Not enough memory for event selector list\n");
867 attrs[0].type = PERF_TYPE_HARDWARE; 909 goto out_symbol_exit;
868 attrs[0].config = PERF_COUNT_HW_CPU_CYCLES;
869 } 910 }
870 911
871 if (target_pid != -1) { 912 if (target_pid != -1)
872 target_tid = target_pid; 913 target_tid = target_pid;
873 thread_num = find_all_tid(target_pid, &all_tids);
874 if (thread_num <= 0) {
875 fprintf(stderr, "Can't find all threads of pid %d\n",
876 target_pid);
877 usage_with_options(record_usage, record_options);
878 }
879 } else {
880 all_tids=malloc(sizeof(pid_t));
881 if (!all_tids)
882 goto out_symbol_exit;
883 914
884 all_tids[0] = target_tid; 915 threads = thread_map__new(target_pid, target_tid);
885 thread_num = 1; 916 if (threads == NULL) {
917 pr_err("Problems finding threads of monitor\n");
918 usage_with_options(record_usage, record_options);
886 } 919 }
887 920
888 for (i = 0; i < MAX_NR_CPUS; i++) { 921 cpus = cpu_map__new(cpu_list);
889 for (j = 0; j < MAX_COUNTERS; j++) { 922 if (cpus == NULL) {
890 fd[i][j] = malloc(sizeof(int)*thread_num); 923 perror("failed to parse CPUs map");
891 if (!fd[i][j]) 924 return -1;
892 goto out_free_fd;
893 }
894 } 925 }
895 event_array = malloc( 926
896 sizeof(struct pollfd)*MAX_NR_CPUS*MAX_COUNTERS*thread_num); 927 list_for_each_entry(pos, &evsel_list, node) {
928 if (perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
929 goto out_free_fd;
930 }
931 event_array = malloc((sizeof(struct pollfd) * MAX_NR_CPUS *
932 MAX_COUNTERS * threads->nr));
897 if (!event_array) 933 if (!event_array)
898 goto out_free_fd; 934 goto out_free_fd;
899 935
@@ -920,12 +956,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
920out_free_event_array: 956out_free_event_array:
921 free(event_array); 957 free(event_array);
922out_free_fd: 958out_free_fd:
923 for (i = 0; i < MAX_NR_CPUS; i++) { 959 thread_map__delete(threads);
924 for (j = 0; j < MAX_COUNTERS; j++) 960 threads = NULL;
925 free(fd[i][j]);
926 }
927 free(all_tids);
928 all_tids = NULL;
929out_symbol_exit: 961out_symbol_exit:
930 symbol__exit(); 962 symbol__exit();
931 return err; 963 return err;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 5de405d45230..75183a4518e6 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -150,13 +150,13 @@ static int add_event_total(struct perf_session *session,
150 return 0; 150 return 0;
151} 151}
152 152
153static int process_sample_event(event_t *event, struct perf_session *session) 153static int process_sample_event(event_t *event, struct sample_data *sample,
154 struct perf_session *session)
154{ 155{
155 struct sample_data data = { .period = 1, };
156 struct addr_location al; 156 struct addr_location al;
157 struct perf_event_attr *attr; 157 struct perf_event_attr *attr;
158 158
159 if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) { 159 if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
160 fprintf(stderr, "problem processing %d event, skipping it.\n", 160 fprintf(stderr, "problem processing %d event, skipping it.\n",
161 event->header.type); 161 event->header.type);
162 return -1; 162 return -1;
@@ -165,14 +165,14 @@ static int process_sample_event(event_t *event, struct perf_session *session)
165 if (al.filtered || (hide_unresolved && al.sym == NULL)) 165 if (al.filtered || (hide_unresolved && al.sym == NULL))
166 return 0; 166 return 0;
167 167
168 if (perf_session__add_hist_entry(session, &al, &data)) { 168 if (perf_session__add_hist_entry(session, &al, sample)) {
169 pr_debug("problem incrementing symbol period, skipping event\n"); 169 pr_debug("problem incrementing symbol period, skipping event\n");
170 return -1; 170 return -1;
171 } 171 }
172 172
173 attr = perf_header__find_attr(data.id, &session->header); 173 attr = perf_header__find_attr(sample->id, &session->header);
174 174
175 if (add_event_total(session, &data, attr)) { 175 if (add_event_total(session, sample, attr)) {
176 pr_debug("problem adding event period\n"); 176 pr_debug("problem adding event period\n");
177 return -1; 177 return -1;
178 } 178 }
@@ -180,7 +180,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
180 return 0; 180 return 0;
181} 181}
182 182
183static int process_read_event(event_t *event, struct perf_session *session __used) 183static int process_read_event(event_t *event, struct sample_data *sample __used,
184 struct perf_session *session __used)
184{ 185{
185 struct perf_event_attr *attr; 186 struct perf_event_attr *attr;
186 187
@@ -243,6 +244,8 @@ static struct perf_event_ops event_ops = {
243 .event_type = event__process_event_type, 244 .event_type = event__process_event_type,
244 .tracing_data = event__process_tracing_data, 245 .tracing_data = event__process_tracing_data,
245 .build_id = event__process_build_id, 246 .build_id = event__process_build_id,
247 .ordered_samples = true,
248 .ordering_requires_timestamps = true,
246}; 249};
247 250
248extern volatile int session_done; 251extern volatile int session_done;
@@ -307,7 +310,7 @@ static int __cmd_report(void)
307 310
308 signal(SIGINT, sig_handler); 311 signal(SIGINT, sig_handler);
309 312
310 session = perf_session__new(input_name, O_RDONLY, force, false); 313 session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops);
311 if (session == NULL) 314 if (session == NULL)
312 return -ENOMEM; 315 return -ENOMEM;
313 316
@@ -442,6 +445,8 @@ static const struct option options[] = {
442 "dump raw trace in ASCII"), 445 "dump raw trace in ASCII"),
443 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 446 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
444 "file", "vmlinux pathname"), 447 "file", "vmlinux pathname"),
448 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
449 "file", "kallsyms pathname"),
445 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), 450 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
446 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, 451 OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
447 "load module symbols - WARNING: use only with -k and LIVE kernel"), 452 "load module symbols - WARNING: use only with -k and LIVE kernel"),
@@ -478,6 +483,8 @@ static const struct option options[] = {
478 "columns '.' is reserved."), 483 "columns '.' is reserved."),
479 OPT_BOOLEAN('U', "hide-unresolved", &hide_unresolved, 484 OPT_BOOLEAN('U', "hide-unresolved", &hide_unresolved,
480 "Only display entries resolved to a symbol"), 485 "Only display entries resolved to a symbol"),
486 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
487 "Look for files with symbols relative to this directory"),
481 OPT_END() 488 OPT_END()
482}; 489};
483 490
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 55f3b5dcc731..7a4ebeb8b016 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1606,25 +1606,15 @@ process_raw_event(event_t *raw_event __used, struct perf_session *session,
1606 process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread); 1606 process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread);
1607} 1607}
1608 1608
1609static int process_sample_event(event_t *event, struct perf_session *session) 1609static int process_sample_event(event_t *event, struct sample_data *sample,
1610 struct perf_session *session)
1610{ 1611{
1611 struct sample_data data;
1612 struct thread *thread; 1612 struct thread *thread;
1613 1613
1614 if (!(session->sample_type & PERF_SAMPLE_RAW)) 1614 if (!(session->sample_type & PERF_SAMPLE_RAW))
1615 return 0; 1615 return 0;
1616 1616
1617 memset(&data, 0, sizeof(data)); 1617 thread = perf_session__findnew(session, sample->pid);
1618 data.time = -1;
1619 data.cpu = -1;
1620 data.period = -1;
1621
1622 event__parse_sample(event, session->sample_type, &data);
1623
1624 dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
1625 data.pid, data.tid, data.ip, data.period);
1626
1627 thread = perf_session__findnew(session, data.pid);
1628 if (thread == NULL) { 1618 if (thread == NULL) {
1629 pr_debug("problem processing %d event, skipping it.\n", 1619 pr_debug("problem processing %d event, skipping it.\n",
1630 event->header.type); 1620 event->header.type);
@@ -1633,10 +1623,11 @@ static int process_sample_event(event_t *event, struct perf_session *session)
1633 1623
1634 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); 1624 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
1635 1625
1636 if (profile_cpu != -1 && profile_cpu != (int)data.cpu) 1626 if (profile_cpu != -1 && profile_cpu != (int)sample->cpu)
1637 return 0; 1627 return 0;
1638 1628
1639 process_raw_event(event, session, data.raw_data, data.cpu, data.time, thread); 1629 process_raw_event(event, session, sample->raw_data, sample->cpu,
1630 sample->time, thread);
1640 1631
1641 return 0; 1632 return 0;
1642} 1633}
@@ -1652,7 +1643,8 @@ static struct perf_event_ops event_ops = {
1652static int read_events(void) 1643static int read_events(void)
1653{ 1644{
1654 int err = -EINVAL; 1645 int err = -EINVAL;
1655 struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0, false); 1646 struct perf_session *session = perf_session__new(input_name, O_RDONLY,
1647 0, false, &event_ops);
1656 if (session == NULL) 1648 if (session == NULL)
1657 return -ENOMEM; 1649 return -ENOMEM;
1658 1650
@@ -1869,6 +1861,9 @@ static int __cmd_record(int argc, const char **argv)
1869 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 1861 rec_argc = ARRAY_SIZE(record_args) + argc - 1;
1870 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 1862 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1871 1863
1864 if (rec_argv)
1865 return -ENOMEM;
1866
1872 for (i = 0; i < ARRAY_SIZE(record_args); i++) 1867 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1873 rec_argv[i] = strdup(record_args[i]); 1868 rec_argv[i] = strdup(record_args[i]);
1874 1869
@@ -1888,10 +1883,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used)
1888 usage_with_options(sched_usage, sched_options); 1883 usage_with_options(sched_usage, sched_options);
1889 1884
1890 /* 1885 /*
1891 * Aliased to 'perf trace' for now: 1886 * Aliased to 'perf script' for now:
1892 */ 1887 */
1893 if (!strcmp(argv[0], "trace")) 1888 if (!strcmp(argv[0], "script"))
1894 return cmd_trace(argc, argv, prefix); 1889 return cmd_script(argc, argv, prefix);
1895 1890
1896 symbol__init(); 1891 symbol__init();
1897 if (!strncmp(argv[0], "rec", 3)) { 1892 if (!strncmp(argv[0], "rec", 3)) {
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-script.c
index 86cfe3800e6b..150a606002eb 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-script.c
@@ -56,29 +56,18 @@ static void setup_scripting(void)
56 56
57static int cleanup_scripting(void) 57static int cleanup_scripting(void)
58{ 58{
59 pr_debug("\nperf trace script stopped\n"); 59 pr_debug("\nperf script stopped\n");
60 60
61 return scripting_ops->stop_script(); 61 return scripting_ops->stop_script();
62} 62}
63 63
64static char const *input_name = "perf.data"; 64static char const *input_name = "perf.data";
65 65
66static int process_sample_event(event_t *event, struct perf_session *session) 66static int process_sample_event(event_t *event, struct sample_data *sample,
67 struct perf_session *session)
67{ 68{
68 struct sample_data data; 69 struct thread *thread = perf_session__findnew(session, event->ip.pid);
69 struct thread *thread;
70 70
71 memset(&data, 0, sizeof(data));
72 data.time = -1;
73 data.cpu = -1;
74 data.period = 1;
75
76 event__parse_sample(event, session->sample_type, &data);
77
78 dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
79 data.pid, data.tid, data.ip, data.period);
80
81 thread = perf_session__findnew(session, event->ip.pid);
82 if (thread == NULL) { 71 if (thread == NULL) {
83 pr_debug("problem processing %d event, skipping it.\n", 72 pr_debug("problem processing %d event, skipping it.\n",
84 event->header.type); 73 event->header.type);
@@ -87,13 +76,13 @@ static int process_sample_event(event_t *event, struct perf_session *session)
87 76
88 if (session->sample_type & PERF_SAMPLE_RAW) { 77 if (session->sample_type & PERF_SAMPLE_RAW) {
89 if (debug_mode) { 78 if (debug_mode) {
90 if (data.time < last_timestamp) { 79 if (sample->time < last_timestamp) {
91 pr_err("Samples misordered, previous: %llu " 80 pr_err("Samples misordered, previous: %llu "
92 "this: %llu\n", last_timestamp, 81 "this: %llu\n", last_timestamp,
93 data.time); 82 sample->time);
94 nr_unordered++; 83 nr_unordered++;
95 } 84 }
96 last_timestamp = data.time; 85 last_timestamp = sample->time;
97 return 0; 86 return 0;
98 } 87 }
99 /* 88 /*
@@ -101,21 +90,12 @@ static int process_sample_event(event_t *event, struct perf_session *session)
101 * field, although it should be the same than this perf 90 * field, although it should be the same than this perf
102 * event pid 91 * event pid
103 */ 92 */
104 scripting_ops->process_event(data.cpu, data.raw_data, 93 scripting_ops->process_event(sample->cpu, sample->raw_data,
105 data.raw_size, 94 sample->raw_size,
106 data.time, thread->comm); 95 sample->time, thread->comm);
107 } 96 }
108 97
109 session->hists.stats.total_period += data.period; 98 session->hists.stats.total_period += sample->period;
110 return 0;
111}
112
113static u64 nr_lost;
114
115static int process_lost_event(event_t *event, struct perf_session *session __used)
116{
117 nr_lost += event->lost.lost;
118
119 return 0; 99 return 0;
120} 100}
121 101
@@ -126,7 +106,7 @@ static struct perf_event_ops event_ops = {
126 .event_type = event__process_event_type, 106 .event_type = event__process_event_type,
127 .tracing_data = event__process_tracing_data, 107 .tracing_data = event__process_tracing_data,
128 .build_id = event__process_build_id, 108 .build_id = event__process_build_id,
129 .lost = process_lost_event, 109 .ordering_requires_timestamps = true,
130 .ordered_samples = true, 110 .ordered_samples = true,
131}; 111};
132 112
@@ -137,7 +117,7 @@ static void sig_handler(int sig __unused)
137 session_done = 1; 117 session_done = 1;
138} 118}
139 119
140static int __cmd_trace(struct perf_session *session) 120static int __cmd_script(struct perf_session *session)
141{ 121{
142 int ret; 122 int ret;
143 123
@@ -145,10 +125,8 @@ static int __cmd_trace(struct perf_session *session)
145 125
146 ret = perf_session__process_events(session, &event_ops); 126 ret = perf_session__process_events(session, &event_ops);
147 127
148 if (debug_mode) { 128 if (debug_mode)
149 pr_err("Misordered timestamps: %llu\n", nr_unordered); 129 pr_err("Misordered timestamps: %llu\n", nr_unordered);
150 pr_err("Lost events: %llu\n", nr_lost);
151 }
152 130
153 return ret; 131 return ret;
154} 132}
@@ -159,7 +137,7 @@ struct script_spec {
159 char spec[0]; 137 char spec[0];
160}; 138};
161 139
162LIST_HEAD(script_specs); 140static LIST_HEAD(script_specs);
163 141
164static struct script_spec *script_spec__new(const char *spec, 142static struct script_spec *script_spec__new(const char *spec,
165 struct scripting_ops *ops) 143 struct scripting_ops *ops)
@@ -247,7 +225,7 @@ static void list_available_languages(void)
247 225
248 fprintf(stderr, "\n"); 226 fprintf(stderr, "\n");
249 fprintf(stderr, "Scripting language extensions (used in " 227 fprintf(stderr, "Scripting language extensions (used in "
250 "perf trace -s [spec:]script.[spec]):\n\n"); 228 "perf script -s [spec:]script.[spec]):\n\n");
251 229
252 list_for_each_entry(s, &script_specs, node) 230 list_for_each_entry(s, &script_specs, node)
253 fprintf(stderr, " %-42s [%s]\n", s->spec, s->ops->name); 231 fprintf(stderr, " %-42s [%s]\n", s->spec, s->ops->name);
@@ -301,17 +279,34 @@ static int parse_scriptname(const struct option *opt __used,
301 return 0; 279 return 0;
302} 280}
303 281
304#define for_each_lang(scripts_dir, lang_dirent, lang_next) \ 282/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */
283static int is_directory(const char *base_path, const struct dirent *dent)
284{
285 char path[PATH_MAX];
286 struct stat st;
287
288 sprintf(path, "%s/%s", base_path, dent->d_name);
289 if (stat(path, &st))
290 return 0;
291
292 return S_ISDIR(st.st_mode);
293}
294
295#define for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next)\
305 while (!readdir_r(scripts_dir, &lang_dirent, &lang_next) && \ 296 while (!readdir_r(scripts_dir, &lang_dirent, &lang_next) && \
306 lang_next) \ 297 lang_next) \
307 if (lang_dirent.d_type == DT_DIR && \ 298 if ((lang_dirent.d_type == DT_DIR || \
299 (lang_dirent.d_type == DT_UNKNOWN && \
300 is_directory(scripts_path, &lang_dirent))) && \
308 (strcmp(lang_dirent.d_name, ".")) && \ 301 (strcmp(lang_dirent.d_name, ".")) && \
309 (strcmp(lang_dirent.d_name, ".."))) 302 (strcmp(lang_dirent.d_name, "..")))
310 303
311#define for_each_script(lang_dir, script_dirent, script_next) \ 304#define for_each_script(lang_path, lang_dir, script_dirent, script_next)\
312 while (!readdir_r(lang_dir, &script_dirent, &script_next) && \ 305 while (!readdir_r(lang_dir, &script_dirent, &script_next) && \
313 script_next) \ 306 script_next) \
314 if (script_dirent.d_type != DT_DIR) 307 if (script_dirent.d_type != DT_DIR && \
308 (script_dirent.d_type != DT_UNKNOWN || \
309 !is_directory(lang_path, &script_dirent)))
315 310
316 311
317#define RECORD_SUFFIX "-record" 312#define RECORD_SUFFIX "-record"
@@ -324,7 +319,7 @@ struct script_desc {
324 char *args; 319 char *args;
325}; 320};
326 321
327LIST_HEAD(script_descs); 322static LIST_HEAD(script_descs);
328 323
329static struct script_desc *script_desc__new(const char *name) 324static struct script_desc *script_desc__new(const char *name)
330{ 325{
@@ -380,10 +375,10 @@ out_delete_desc:
380 return NULL; 375 return NULL;
381} 376}
382 377
383static char *ends_with(char *str, const char *suffix) 378static const char *ends_with(const char *str, const char *suffix)
384{ 379{
385 size_t suffix_len = strlen(suffix); 380 size_t suffix_len = strlen(suffix);
386 char *p = str; 381 const char *p = str;
387 382
388 if (strlen(str) > suffix_len) { 383 if (strlen(str) > suffix_len) {
389 p = str + strlen(str) - suffix_len; 384 p = str + strlen(str) - suffix_len;
@@ -466,16 +461,16 @@ static int list_available_scripts(const struct option *opt __used,
466 if (!scripts_dir) 461 if (!scripts_dir)
467 return -1; 462 return -1;
468 463
469 for_each_lang(scripts_dir, lang_dirent, lang_next) { 464 for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) {
470 snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, 465 snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
471 lang_dirent.d_name); 466 lang_dirent.d_name);
472 lang_dir = opendir(lang_path); 467 lang_dir = opendir(lang_path);
473 if (!lang_dir) 468 if (!lang_dir)
474 continue; 469 continue;
475 470
476 for_each_script(lang_dir, script_dirent, script_next) { 471 for_each_script(lang_path, lang_dir, script_dirent, script_next) {
477 script_root = strdup(script_dirent.d_name); 472 script_root = strdup(script_dirent.d_name);
478 str = ends_with(script_root, REPORT_SUFFIX); 473 str = (char *)ends_with(script_root, REPORT_SUFFIX);
479 if (str) { 474 if (str) {
480 *str = '\0'; 475 *str = '\0';
481 desc = script_desc__findnew(script_root); 476 desc = script_desc__findnew(script_root);
@@ -514,16 +509,16 @@ static char *get_script_path(const char *script_root, const char *suffix)
514 if (!scripts_dir) 509 if (!scripts_dir)
515 return NULL; 510 return NULL;
516 511
517 for_each_lang(scripts_dir, lang_dirent, lang_next) { 512 for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) {
518 snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, 513 snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
519 lang_dirent.d_name); 514 lang_dirent.d_name);
520 lang_dir = opendir(lang_path); 515 lang_dir = opendir(lang_path);
521 if (!lang_dir) 516 if (!lang_dir)
522 continue; 517 continue;
523 518
524 for_each_script(lang_dir, script_dirent, script_next) { 519 for_each_script(lang_path, lang_dir, script_dirent, script_next) {
525 __script_root = strdup(script_dirent.d_name); 520 __script_root = strdup(script_dirent.d_name);
526 str = ends_with(__script_root, suffix); 521 str = (char *)ends_with(__script_root, suffix);
527 if (str) { 522 if (str) {
528 *str = '\0'; 523 *str = '\0';
529 if (strcmp(__script_root, script_root)) 524 if (strcmp(__script_root, script_root))
@@ -543,7 +538,7 @@ static char *get_script_path(const char *script_root, const char *suffix)
543 538
544static bool is_top_script(const char *script_path) 539static bool is_top_script(const char *script_path)
545{ 540{
546 return ends_with((char *)script_path, "top") == NULL ? false : true; 541 return ends_with(script_path, "top") == NULL ? false : true;
547} 542}
548 543
549static int has_required_arg(char *script_path) 544static int has_required_arg(char *script_path)
@@ -569,12 +564,12 @@ out:
569 return n_args; 564 return n_args;
570} 565}
571 566
572static const char * const trace_usage[] = { 567static const char * const script_usage[] = {
573 "perf trace [<options>]", 568 "perf script [<options>]",
574 "perf trace [<options>] record <script> [<record-options>] <command>", 569 "perf script [<options>] record <script> [<record-options>] <command>",
575 "perf trace [<options>] report <script> [script-args]", 570 "perf script [<options>] report <script> [script-args]",
576 "perf trace [<options>] <script> [<record-options>] <command>", 571 "perf script [<options>] <script> [<record-options>] <command>",
577 "perf trace [<options>] <top-script> [script-args]", 572 "perf script [<options>] <top-script> [script-args]",
578 NULL 573 NULL
579}; 574};
580 575
@@ -591,7 +586,7 @@ static const struct option options[] = {
591 "script file name (lang:script name, script name, or *)", 586 "script file name (lang:script name, script name, or *)",
592 parse_scriptname), 587 parse_scriptname),
593 OPT_STRING('g', "gen-script", &generate_script_lang, "lang", 588 OPT_STRING('g', "gen-script", &generate_script_lang, "lang",
594 "generate perf-trace.xx script in specified language"), 589 "generate perf-script.xx script in specified language"),
595 OPT_STRING('i', "input", &input_name, "file", 590 OPT_STRING('i', "input", &input_name, "file",
596 "input file name"), 591 "input file name"),
597 OPT_BOOLEAN('d', "debug-mode", &debug_mode, 592 OPT_BOOLEAN('d', "debug-mode", &debug_mode,
@@ -614,7 +609,7 @@ static bool have_cmd(int argc, const char **argv)
614 return argc != 0; 609 return argc != 0;
615} 610}
616 611
617int cmd_trace(int argc, const char **argv, const char *prefix __used) 612int cmd_script(int argc, const char **argv, const char *prefix __used)
618{ 613{
619 char *rec_script_path = NULL; 614 char *rec_script_path = NULL;
620 char *rep_script_path = NULL; 615 char *rep_script_path = NULL;
@@ -626,7 +621,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
626 621
627 setup_scripting(); 622 setup_scripting();
628 623
629 argc = parse_options(argc, argv, options, trace_usage, 624 argc = parse_options(argc, argv, options, script_usage,
630 PARSE_OPT_STOP_AT_NON_OPTION); 625 PARSE_OPT_STOP_AT_NON_OPTION);
631 626
632 if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) { 627 if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) {
@@ -640,7 +635,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
640 if (!rep_script_path) { 635 if (!rep_script_path) {
641 fprintf(stderr, 636 fprintf(stderr,
642 "Please specify a valid report script" 637 "Please specify a valid report script"
643 "(see 'perf trace -l' for listing)\n"); 638 "(see 'perf script -l' for listing)\n");
644 return -1; 639 return -1;
645 } 640 }
646 } 641 }
@@ -658,8 +653,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
658 653
659 if (!rec_script_path && !rep_script_path) { 654 if (!rec_script_path && !rep_script_path) {
660 fprintf(stderr, " Couldn't find script %s\n\n See perf" 655 fprintf(stderr, " Couldn't find script %s\n\n See perf"
661 " trace -l for available scripts.\n", argv[0]); 656 " script -l for available scripts.\n", argv[0]);
662 usage_with_options(trace_usage, options); 657 usage_with_options(script_usage, options);
663 } 658 }
664 659
665 if (is_top_script(argv[0])) { 660 if (is_top_script(argv[0])) {
@@ -671,9 +666,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
671 rec_args = (argc - 1) - rep_args; 666 rec_args = (argc - 1) - rep_args;
672 if (rec_args < 0) { 667 if (rec_args < 0) {
673 fprintf(stderr, " %s script requires options." 668 fprintf(stderr, " %s script requires options."
674 "\n\n See perf trace -l for available " 669 "\n\n See perf script -l for available "
675 "scripts and options.\n", argv[0]); 670 "scripts and options.\n", argv[0]);
676 usage_with_options(trace_usage, options); 671 usage_with_options(script_usage, options);
677 } 672 }
678 } 673 }
679 674
@@ -772,7 +767,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
772 if (!script_name) 767 if (!script_name)
773 setup_pager(); 768 setup_pager();
774 769
775 session = perf_session__new(input_name, O_RDONLY, 0, false); 770 session = perf_session__new(input_name, O_RDONLY, 0, false, &event_ops);
776 if (session == NULL) 771 if (session == NULL)
777 return -ENOMEM; 772 return -ENOMEM;
778 773
@@ -806,7 +801,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
806 return -1; 801 return -1;
807 } 802 }
808 803
809 err = scripting_ops->generate_script("perf-trace"); 804 err = scripting_ops->generate_script("perf-script");
810 goto out; 805 goto out;
811 } 806 }
812 807
@@ -814,10 +809,10 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
814 err = scripting_ops->start_script(script_name, argc, argv); 809 err = scripting_ops->start_script(script_name, argc, argv);
815 if (err) 810 if (err)
816 goto out; 811 goto out;
817 pr_debug("perf trace started with script %s\n\n", script_name); 812 pr_debug("perf script started with script %s\n\n", script_name);
818 } 813 }
819 814
820 err = __cmd_trace(session); 815 err = __cmd_script(session);
821 816
822 perf_session__delete(session); 817 perf_session__delete(session);
823 cleanup_scripting(); 818 cleanup_scripting();
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a6b4d44f9502..02b2d8013a61 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -43,6 +43,7 @@
43#include "util/parse-options.h" 43#include "util/parse-options.h"
44#include "util/parse-events.h" 44#include "util/parse-events.h"
45#include "util/event.h" 45#include "util/event.h"
46#include "util/evsel.h"
46#include "util/debug.h" 47#include "util/debug.h"
47#include "util/header.h" 48#include "util/header.h"
48#include "util/cpumap.h" 49#include "util/cpumap.h"
@@ -52,6 +53,8 @@
52#include <math.h> 53#include <math.h>
53#include <locale.h> 54#include <locale.h>
54 55
56#define DEFAULT_SEPARATOR " "
57
55static struct perf_event_attr default_attrs[] = { 58static struct perf_event_attr default_attrs[] = {
56 59
57 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 60 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
@@ -69,25 +72,23 @@ static struct perf_event_attr default_attrs[] = {
69}; 72};
70 73
71static bool system_wide = false; 74static bool system_wide = false;
72static int nr_cpus = 0; 75static struct cpu_map *cpus;
73static int run_idx = 0; 76static int run_idx = 0;
74 77
75static int run_count = 1; 78static int run_count = 1;
76static bool no_inherit = false; 79static bool no_inherit = false;
77static bool scale = true; 80static bool scale = true;
81static bool no_aggr = false;
78static pid_t target_pid = -1; 82static pid_t target_pid = -1;
79static pid_t target_tid = -1; 83static pid_t target_tid = -1;
80static pid_t *all_tids = NULL; 84static struct thread_map *threads;
81static int thread_num = 0;
82static pid_t child_pid = -1; 85static pid_t child_pid = -1;
83static bool null_run = false; 86static bool null_run = false;
84static bool big_num = false; 87static bool big_num = true;
88static int big_num_opt = -1;
85static const char *cpu_list; 89static const char *cpu_list;
86 90static const char *csv_sep = NULL;
87 91static bool csv_output = false;
88static int *fd[MAX_NR_CPUS][MAX_COUNTERS];
89
90static int event_scaled[MAX_COUNTERS];
91 92
92static volatile int done = 0; 93static volatile int done = 0;
93 94
@@ -96,6 +97,22 @@ struct stats
96 double n, mean, M2; 97 double n, mean, M2;
97}; 98};
98 99
100struct perf_stat {
101 struct stats res_stats[3];
102};
103
104static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
105{
106 evsel->priv = zalloc(sizeof(struct perf_stat));
107 return evsel->priv == NULL ? -ENOMEM : 0;
108}
109
110static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
111{
112 free(evsel->priv);
113 evsel->priv = NULL;
114}
115
99static void update_stats(struct stats *stats, u64 val) 116static void update_stats(struct stats *stats, u64 val)
100{ 117{
101 double delta; 118 double delta;
@@ -135,69 +152,38 @@ static double stddev_stats(struct stats *stats)
135 return sqrt(variance_mean); 152 return sqrt(variance_mean);
136} 153}
137 154
138struct stats event_res_stats[MAX_COUNTERS][3]; 155struct stats runtime_nsecs_stats[MAX_NR_CPUS];
139struct stats runtime_nsecs_stats; 156struct stats runtime_cycles_stats[MAX_NR_CPUS];
157struct stats runtime_branches_stats[MAX_NR_CPUS];
140struct stats walltime_nsecs_stats; 158struct stats walltime_nsecs_stats;
141struct stats runtime_cycles_stats;
142struct stats runtime_branches_stats;
143 159
144#define MATCH_EVENT(t, c, counter) \ 160static int create_perf_stat_counter(struct perf_evsel *evsel)
145 (attrs[counter].type == PERF_TYPE_##t && \
146 attrs[counter].config == PERF_COUNT_##c)
147
148#define ERR_PERF_OPEN \
149"Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n"
150
151static int create_perf_stat_counter(int counter)
152{ 161{
153 struct perf_event_attr *attr = attrs + counter; 162 struct perf_event_attr *attr = &evsel->attr;
154 int thread;
155 int ncreated = 0;
156 163
157 if (scale) 164 if (scale)
158 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 165 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
159 PERF_FORMAT_TOTAL_TIME_RUNNING; 166 PERF_FORMAT_TOTAL_TIME_RUNNING;
160 167
161 if (system_wide) { 168 if (system_wide)
162 int cpu; 169 return perf_evsel__open_per_cpu(evsel, cpus);
163 170
164 for (cpu = 0; cpu < nr_cpus; cpu++) { 171 attr->inherit = !no_inherit;
165 fd[cpu][counter][0] = sys_perf_event_open(attr, 172 if (target_pid == -1 && target_tid == -1) {
166 -1, cpumap[cpu], -1, 0); 173 attr->disabled = 1;
167 if (fd[cpu][counter][0] < 0) 174 attr->enable_on_exec = 1;
168 pr_debug(ERR_PERF_OPEN, counter,
169 fd[cpu][counter][0], strerror(errno));
170 else
171 ++ncreated;
172 }
173 } else {
174 attr->inherit = !no_inherit;
175 if (target_pid == -1 && target_tid == -1) {
176 attr->disabled = 1;
177 attr->enable_on_exec = 1;
178 }
179 for (thread = 0; thread < thread_num; thread++) {
180 fd[0][counter][thread] = sys_perf_event_open(attr,
181 all_tids[thread], -1, -1, 0);
182 if (fd[0][counter][thread] < 0)
183 pr_debug(ERR_PERF_OPEN, counter,
184 fd[0][counter][thread],
185 strerror(errno));
186 else
187 ++ncreated;
188 }
189 } 175 }
190 176
191 return ncreated; 177 return perf_evsel__open_per_thread(evsel, threads);
192} 178}
193 179
194/* 180/*
195 * Does the counter have nsecs as a unit? 181 * Does the counter have nsecs as a unit?
196 */ 182 */
197static inline int nsec_counter(int counter) 183static inline int nsec_counter(struct perf_evsel *evsel)
198{ 184{
199 if (MATCH_EVENT(SOFTWARE, SW_CPU_CLOCK, counter) || 185 if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
200 MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) 186 perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
201 return 1; 187 return 1;
202 188
203 return 0; 189 return 0;
@@ -205,55 +191,19 @@ static inline int nsec_counter(int counter)
205 191
206/* 192/*
207 * Read out the results of a single counter: 193 * Read out the results of a single counter:
194 * aggregate counts across CPUs in system-wide mode
208 */ 195 */
209static void read_counter(int counter) 196static int read_counter_aggr(struct perf_evsel *counter)
210{ 197{
211 u64 count[3], single_count[3]; 198 struct perf_stat *ps = counter->priv;
212 int cpu; 199 u64 *count = counter->counts->aggr.values;
213 size_t res, nv; 200 int i;
214 int scaled;
215 int i, thread;
216
217 count[0] = count[1] = count[2] = 0;
218
219 nv = scale ? 3 : 1;
220 for (cpu = 0; cpu < nr_cpus; cpu++) {
221 for (thread = 0; thread < thread_num; thread++) {
222 if (fd[cpu][counter][thread] < 0)
223 continue;
224
225 res = read(fd[cpu][counter][thread],
226 single_count, nv * sizeof(u64));
227 assert(res == nv * sizeof(u64));
228
229 close(fd[cpu][counter][thread]);
230 fd[cpu][counter][thread] = -1;
231
232 count[0] += single_count[0];
233 if (scale) {
234 count[1] += single_count[1];
235 count[2] += single_count[2];
236 }
237 }
238 }
239
240 scaled = 0;
241 if (scale) {
242 if (count[2] == 0) {
243 event_scaled[counter] = -1;
244 count[0] = 0;
245 return;
246 }
247 201
248 if (count[2] < count[1]) { 202 if (__perf_evsel__read(counter, cpus->nr, threads->nr, scale) < 0)
249 event_scaled[counter] = 1; 203 return -1;
250 count[0] = (unsigned long long)
251 ((double)count[0] * count[1] / count[2] + 0.5);
252 }
253 }
254 204
255 for (i = 0; i < 3; i++) 205 for (i = 0; i < 3; i++)
256 update_stats(&event_res_stats[counter][i], count[i]); 206 update_stats(&ps->res_stats[i], count[i]);
257 207
258 if (verbose) { 208 if (verbose) {
259 fprintf(stderr, "%s: %Ld %Ld %Ld\n", event_name(counter), 209 fprintf(stderr, "%s: %Ld %Ld %Ld\n", event_name(counter),
@@ -263,26 +213,51 @@ static void read_counter(int counter)
263 /* 213 /*
264 * Save the full runtime - to allow normalization during printout: 214 * Save the full runtime - to allow normalization during printout:
265 */ 215 */
266 if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) 216 if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
267 update_stats(&runtime_nsecs_stats, count[0]); 217 update_stats(&runtime_nsecs_stats[0], count[0]);
268 if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) 218 if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
269 update_stats(&runtime_cycles_stats, count[0]); 219 update_stats(&runtime_cycles_stats[0], count[0]);
270 if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter)) 220 if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
271 update_stats(&runtime_branches_stats, count[0]); 221 update_stats(&runtime_branches_stats[0], count[0]);
222
223 return 0;
224}
225
226/*
227 * Read out the results of a single counter:
228 * do not aggregate counts across CPUs in system-wide mode
229 */
230static int read_counter(struct perf_evsel *counter)
231{
232 u64 *count;
233 int cpu;
234
235 for (cpu = 0; cpu < cpus->nr; cpu++) {
236 if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
237 return -1;
238
239 count = counter->counts->cpu[cpu].values;
240
241 if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
242 update_stats(&runtime_nsecs_stats[cpu], count[0]);
243 if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
244 update_stats(&runtime_cycles_stats[cpu], count[0]);
245 if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
246 update_stats(&runtime_branches_stats[cpu], count[0]);
247 }
248
249 return 0;
272} 250}
273 251
274static int run_perf_stat(int argc __used, const char **argv) 252static int run_perf_stat(int argc __used, const char **argv)
275{ 253{
276 unsigned long long t0, t1; 254 unsigned long long t0, t1;
255 struct perf_evsel *counter;
277 int status = 0; 256 int status = 0;
278 int counter, ncreated = 0;
279 int child_ready_pipe[2], go_pipe[2]; 257 int child_ready_pipe[2], go_pipe[2];
280 const bool forks = (argc > 0); 258 const bool forks = (argc > 0);
281 char buf; 259 char buf;
282 260
283 if (!system_wide)
284 nr_cpus = 1;
285
286 if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { 261 if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
287 perror("failed to create pipes"); 262 perror("failed to create pipes");
288 exit(1); 263 exit(1);
@@ -322,7 +297,7 @@ static int run_perf_stat(int argc __used, const char **argv)
322 } 297 }
323 298
324 if (target_tid == -1 && target_pid == -1 && !system_wide) 299 if (target_tid == -1 && target_pid == -1 && !system_wide)
325 all_tids[0] = child_pid; 300 threads->map[0] = child_pid;
326 301
327 /* 302 /*
328 * Wait for the child to be ready to exec. 303 * Wait for the child to be ready to exec.
@@ -334,16 +309,23 @@ static int run_perf_stat(int argc __used, const char **argv)
334 close(child_ready_pipe[0]); 309 close(child_ready_pipe[0]);
335 } 310 }
336 311
337 for (counter = 0; counter < nr_counters; counter++) 312 list_for_each_entry(counter, &evsel_list, node) {
338 ncreated += create_perf_stat_counter(counter); 313 if (create_perf_stat_counter(counter) < 0) {
339 314 if (errno == -EPERM || errno == -EACCES) {
340 if (ncreated == 0) { 315 error("You may not have permission to collect %sstats.\n"
341 pr_err("No permission to collect %sstats.\n" 316 "\t Consider tweaking"
342 "Consider tweaking /proc/sys/kernel/perf_event_paranoid.\n", 317 " /proc/sys/kernel/perf_event_paranoid or running as root.",
343 system_wide ? "system-wide " : ""); 318 system_wide ? "system-wide " : "");
344 if (child_pid != -1) 319 } else {
345 kill(child_pid, SIGTERM); 320 error("open_counter returned with %d (%s). "
346 return -1; 321 "/bin/dmesg may provide additional information.\n",
322 errno, strerror(errno));
323 }
324 if (child_pid != -1)
325 kill(child_pid, SIGTERM);
326 die("Not all events could be opened.\n");
327 return -1;
328 }
347 } 329 }
348 330
349 /* 331 /*
@@ -362,60 +344,97 @@ static int run_perf_stat(int argc __used, const char **argv)
362 344
363 update_stats(&walltime_nsecs_stats, t1 - t0); 345 update_stats(&walltime_nsecs_stats, t1 - t0);
364 346
365 for (counter = 0; counter < nr_counters; counter++) 347 if (no_aggr) {
366 read_counter(counter); 348 list_for_each_entry(counter, &evsel_list, node) {
349 read_counter(counter);
350 perf_evsel__close_fd(counter, cpus->nr, 1);
351 }
352 } else {
353 list_for_each_entry(counter, &evsel_list, node) {
354 read_counter_aggr(counter);
355 perf_evsel__close_fd(counter, cpus->nr, threads->nr);
356 }
357 }
367 358
368 return WEXITSTATUS(status); 359 return WEXITSTATUS(status);
369} 360}
370 361
371static void print_noise(int counter, double avg) 362static void print_noise(struct perf_evsel *evsel, double avg)
372{ 363{
364 struct perf_stat *ps;
365
373 if (run_count == 1) 366 if (run_count == 1)
374 return; 367 return;
375 368
369 ps = evsel->priv;
376 fprintf(stderr, " ( +- %7.3f%% )", 370 fprintf(stderr, " ( +- %7.3f%% )",
377 100 * stddev_stats(&event_res_stats[counter][0]) / avg); 371 100 * stddev_stats(&ps->res_stats[0]) / avg);
378} 372}
379 373
380static void nsec_printout(int counter, double avg) 374static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
381{ 375{
382 double msecs = avg / 1e6; 376 double msecs = avg / 1e6;
377 char cpustr[16] = { '\0', };
378 const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-24s";
383 379
384 fprintf(stderr, " %18.6f %-24s", msecs, event_name(counter)); 380 if (no_aggr)
381 sprintf(cpustr, "CPU%*d%s",
382 csv_output ? 0 : -4,
383 cpus->map[cpu], csv_sep);
384
385 fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(evsel));
386
387 if (csv_output)
388 return;
385 389
386 if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) { 390 if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
387 fprintf(stderr, " # %10.3f CPUs ", 391 fprintf(stderr, " # %10.3f CPUs ",
388 avg / avg_stats(&walltime_nsecs_stats)); 392 avg / avg_stats(&walltime_nsecs_stats));
389 }
390} 393}
391 394
392static void abs_printout(int counter, double avg) 395static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
393{ 396{
394 double total, ratio = 0.0; 397 double total, ratio = 0.0;
398 char cpustr[16] = { '\0', };
399 const char *fmt;
400
401 if (csv_output)
402 fmt = "%s%.0f%s%s";
403 else if (big_num)
404 fmt = "%s%'18.0f%s%-24s";
405 else
406 fmt = "%s%18.0f%s%-24s";
395 407
396 if (big_num) 408 if (no_aggr)
397 fprintf(stderr, " %'18.0f %-24s", avg, event_name(counter)); 409 sprintf(cpustr, "CPU%*d%s",
410 csv_output ? 0 : -4,
411 cpus->map[cpu], csv_sep);
398 else 412 else
399 fprintf(stderr, " %18.0f %-24s", avg, event_name(counter)); 413 cpu = 0;
414
415 fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(evsel));
400 416
401 if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) { 417 if (csv_output)
402 total = avg_stats(&runtime_cycles_stats); 418 return;
419
420 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
421 total = avg_stats(&runtime_cycles_stats[cpu]);
403 422
404 if (total) 423 if (total)
405 ratio = avg / total; 424 ratio = avg / total;
406 425
407 fprintf(stderr, " # %10.3f IPC ", ratio); 426 fprintf(stderr, " # %10.3f IPC ", ratio);
408 } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) && 427 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
409 runtime_branches_stats.n != 0) { 428 runtime_branches_stats[cpu].n != 0) {
410 total = avg_stats(&runtime_branches_stats); 429 total = avg_stats(&runtime_branches_stats[cpu]);
411 430
412 if (total) 431 if (total)
413 ratio = avg * 100 / total; 432 ratio = avg * 100 / total;
414 433
415 fprintf(stderr, " # %10.3f %% ", ratio); 434 fprintf(stderr, " # %10.3f %% ", ratio);
416 435
417 } else if (runtime_nsecs_stats.n != 0) { 436 } else if (runtime_nsecs_stats[cpu].n != 0) {
418 total = avg_stats(&runtime_nsecs_stats); 437 total = avg_stats(&runtime_nsecs_stats[cpu]);
419 438
420 if (total) 439 if (total)
421 ratio = 1000.0 * avg / total; 440 ratio = 1000.0 * avg / total;
@@ -426,30 +445,38 @@ static void abs_printout(int counter, double avg)
426 445
427/* 446/*
428 * Print out the results of a single counter: 447 * Print out the results of a single counter:
448 * aggregated counts in system-wide mode
429 */ 449 */
430static void print_counter(int counter) 450static void print_counter_aggr(struct perf_evsel *counter)
431{ 451{
432 double avg = avg_stats(&event_res_stats[counter][0]); 452 struct perf_stat *ps = counter->priv;
433 int scaled = event_scaled[counter]; 453 double avg = avg_stats(&ps->res_stats[0]);
454 int scaled = counter->counts->scaled;
434 455
435 if (scaled == -1) { 456 if (scaled == -1) {
436 fprintf(stderr, " %18s %-24s\n", 457 fprintf(stderr, "%*s%s%-24s\n",
437 "<not counted>", event_name(counter)); 458 csv_output ? 0 : 18,
459 "<not counted>", csv_sep, event_name(counter));
438 return; 460 return;
439 } 461 }
440 462
441 if (nsec_counter(counter)) 463 if (nsec_counter(counter))
442 nsec_printout(counter, avg); 464 nsec_printout(-1, counter, avg);
443 else 465 else
444 abs_printout(counter, avg); 466 abs_printout(-1, counter, avg);
467
468 if (csv_output) {
469 fputc('\n', stderr);
470 return;
471 }
445 472
446 print_noise(counter, avg); 473 print_noise(counter, avg);
447 474
448 if (scaled) { 475 if (scaled) {
449 double avg_enabled, avg_running; 476 double avg_enabled, avg_running;
450 477
451 avg_enabled = avg_stats(&event_res_stats[counter][1]); 478 avg_enabled = avg_stats(&ps->res_stats[1]);
452 avg_running = avg_stats(&event_res_stats[counter][2]); 479 avg_running = avg_stats(&ps->res_stats[2]);
453 480
454 fprintf(stderr, " (scaled from %.2f%%)", 481 fprintf(stderr, " (scaled from %.2f%%)",
455 100 * avg_running / avg_enabled); 482 100 * avg_running / avg_enabled);
@@ -458,40 +485,92 @@ static void print_counter(int counter)
458 fprintf(stderr, "\n"); 485 fprintf(stderr, "\n");
459} 486}
460 487
488/*
489 * Print out the results of a single counter:
490 * does not use aggregated count in system-wide
491 */
492static void print_counter(struct perf_evsel *counter)
493{
494 u64 ena, run, val;
495 int cpu;
496
497 for (cpu = 0; cpu < cpus->nr; cpu++) {
498 val = counter->counts->cpu[cpu].val;
499 ena = counter->counts->cpu[cpu].ena;
500 run = counter->counts->cpu[cpu].run;
501 if (run == 0 || ena == 0) {
502 fprintf(stderr, "CPU%*d%s%*s%s%-24s",
503 csv_output ? 0 : -4,
504 cpus->map[cpu], csv_sep,
505 csv_output ? 0 : 18,
506 "<not counted>", csv_sep,
507 event_name(counter));
508
509 fprintf(stderr, "\n");
510 continue;
511 }
512
513 if (nsec_counter(counter))
514 nsec_printout(cpu, counter, val);
515 else
516 abs_printout(cpu, counter, val);
517
518 if (!csv_output) {
519 print_noise(counter, 1.0);
520
521 if (run != ena) {
522 fprintf(stderr, " (scaled from %.2f%%)",
523 100.0 * run / ena);
524 }
525 }
526 fprintf(stderr, "\n");
527 }
528}
529
461static void print_stat(int argc, const char **argv) 530static void print_stat(int argc, const char **argv)
462{ 531{
463 int i, counter; 532 struct perf_evsel *counter;
533 int i;
464 534
465 fflush(stdout); 535 fflush(stdout);
466 536
467 fprintf(stderr, "\n"); 537 if (!csv_output) {
468 fprintf(stderr, " Performance counter stats for "); 538 fprintf(stderr, "\n");
469 if(target_pid == -1 && target_tid == -1) { 539 fprintf(stderr, " Performance counter stats for ");
470 fprintf(stderr, "\'%s", argv[0]); 540 if(target_pid == -1 && target_tid == -1) {
471 for (i = 1; i < argc; i++) 541 fprintf(stderr, "\'%s", argv[0]);
472 fprintf(stderr, " %s", argv[i]); 542 for (i = 1; i < argc; i++)
473 } else if (target_pid != -1) 543 fprintf(stderr, " %s", argv[i]);
474 fprintf(stderr, "process id \'%d", target_pid); 544 } else if (target_pid != -1)
475 else 545 fprintf(stderr, "process id \'%d", target_pid);
476 fprintf(stderr, "thread id \'%d", target_tid); 546 else
477 547 fprintf(stderr, "thread id \'%d", target_tid);
478 fprintf(stderr, "\'"); 548
479 if (run_count > 1) 549 fprintf(stderr, "\'");
480 fprintf(stderr, " (%d runs)", run_count); 550 if (run_count > 1)
481 fprintf(stderr, ":\n\n"); 551 fprintf(stderr, " (%d runs)", run_count);
552 fprintf(stderr, ":\n\n");
553 }
482 554
483 for (counter = 0; counter < nr_counters; counter++) 555 if (no_aggr) {
484 print_counter(counter); 556 list_for_each_entry(counter, &evsel_list, node)
557 print_counter(counter);
558 } else {
559 list_for_each_entry(counter, &evsel_list, node)
560 print_counter_aggr(counter);
561 }
485 562
486 fprintf(stderr, "\n"); 563 if (!csv_output) {
487 fprintf(stderr, " %18.9f seconds time elapsed", 564 fprintf(stderr, "\n");
488 avg_stats(&walltime_nsecs_stats)/1e9); 565 fprintf(stderr, " %18.9f seconds time elapsed",
489 if (run_count > 1) { 566 avg_stats(&walltime_nsecs_stats)/1e9);
490 fprintf(stderr, " ( +- %7.3f%% )", 567 if (run_count > 1) {
568 fprintf(stderr, " ( +- %7.3f%% )",
491 100*stddev_stats(&walltime_nsecs_stats) / 569 100*stddev_stats(&walltime_nsecs_stats) /
492 avg_stats(&walltime_nsecs_stats)); 570 avg_stats(&walltime_nsecs_stats));
571 }
572 fprintf(stderr, "\n\n");
493 } 573 }
494 fprintf(stderr, "\n\n");
495} 574}
496 575
497static volatile int signr = -1; 576static volatile int signr = -1;
@@ -521,6 +600,13 @@ static const char * const stat_usage[] = {
521 NULL 600 NULL
522}; 601};
523 602
603static int stat__set_big_num(const struct option *opt __used,
604 const char *s __used, int unset)
605{
606 big_num_opt = unset ? 0 : 1;
607 return 0;
608}
609
524static const struct option options[] = { 610static const struct option options[] = {
525 OPT_CALLBACK('e', "event", NULL, "event", 611 OPT_CALLBACK('e', "event", NULL, "event",
526 "event selector. use 'perf list' to list available events", 612 "event selector. use 'perf list' to list available events",
@@ -541,64 +627,96 @@ static const struct option options[] = {
541 "repeat command and print average + stddev (max: 100)"), 627 "repeat command and print average + stddev (max: 100)"),
542 OPT_BOOLEAN('n', "null", &null_run, 628 OPT_BOOLEAN('n', "null", &null_run,
543 "null run - dont start any counters"), 629 "null run - dont start any counters"),
544 OPT_BOOLEAN('B', "big-num", &big_num, 630 OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
545 "print large numbers with thousands\' separators"), 631 "print large numbers with thousands\' separators",
632 stat__set_big_num),
546 OPT_STRING('C', "cpu", &cpu_list, "cpu", 633 OPT_STRING('C', "cpu", &cpu_list, "cpu",
547 "list of cpus to monitor in system-wide"), 634 "list of cpus to monitor in system-wide"),
635 OPT_BOOLEAN('A', "no-aggr", &no_aggr,
636 "disable CPU count aggregation"),
637 OPT_STRING('x', "field-separator", &csv_sep, "separator",
638 "print counts with custom separator"),
548 OPT_END() 639 OPT_END()
549}; 640};
550 641
551int cmd_stat(int argc, const char **argv, const char *prefix __used) 642int cmd_stat(int argc, const char **argv, const char *prefix __used)
552{ 643{
553 int status; 644 struct perf_evsel *pos;
554 int i,j; 645 int status = -ENOMEM;
555 646
556 setlocale(LC_ALL, ""); 647 setlocale(LC_ALL, "");
557 648
558 argc = parse_options(argc, argv, options, stat_usage, 649 argc = parse_options(argc, argv, options, stat_usage,
559 PARSE_OPT_STOP_AT_NON_OPTION); 650 PARSE_OPT_STOP_AT_NON_OPTION);
651
652 if (csv_sep)
653 csv_output = true;
654 else
655 csv_sep = DEFAULT_SEPARATOR;
656
657 /*
658 * let the spreadsheet do the pretty-printing
659 */
660 if (csv_output) {
661 /* User explicitely passed -B? */
662 if (big_num_opt == 1) {
663 fprintf(stderr, "-B option not supported with -x\n");
664 usage_with_options(stat_usage, options);
665 } else /* Nope, so disable big number formatting */
666 big_num = false;
667 } else if (big_num_opt == 0) /* User passed --no-big-num */
668 big_num = false;
669
560 if (!argc && target_pid == -1 && target_tid == -1) 670 if (!argc && target_pid == -1 && target_tid == -1)
561 usage_with_options(stat_usage, options); 671 usage_with_options(stat_usage, options);
562 if (run_count <= 0) 672 if (run_count <= 0)
563 usage_with_options(stat_usage, options); 673 usage_with_options(stat_usage, options);
564 674
675 /* no_aggr is for system-wide only */
676 if (no_aggr && !system_wide)
677 usage_with_options(stat_usage, options);
678
565 /* Set attrs and nr_counters if no event is selected and !null_run */ 679 /* Set attrs and nr_counters if no event is selected and !null_run */
566 if (!null_run && !nr_counters) { 680 if (!null_run && !nr_counters) {
567 memcpy(attrs, default_attrs, sizeof(default_attrs)); 681 size_t c;
682
568 nr_counters = ARRAY_SIZE(default_attrs); 683 nr_counters = ARRAY_SIZE(default_attrs);
684
685 for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) {
686 pos = perf_evsel__new(default_attrs[c].type,
687 default_attrs[c].config,
688 nr_counters);
689 if (pos == NULL)
690 goto out;
691 list_add(&pos->node, &evsel_list);
692 }
569 } 693 }
570 694
571 if (system_wide) 695 if (target_pid != -1)
572 nr_cpus = read_cpu_map(cpu_list); 696 target_tid = target_pid;
573 else
574 nr_cpus = 1;
575 697
576 if (nr_cpus < 1) 698 threads = thread_map__new(target_pid, target_tid);
699 if (threads == NULL) {
700 pr_err("Problems finding threads of monitor\n");
577 usage_with_options(stat_usage, options); 701 usage_with_options(stat_usage, options);
702 }
578 703
579 if (target_pid != -1) { 704 if (system_wide)
580 target_tid = target_pid; 705 cpus = cpu_map__new(cpu_list);
581 thread_num = find_all_tid(target_pid, &all_tids); 706 else
582 if (thread_num <= 0) { 707 cpus = cpu_map__dummy_new();
583 fprintf(stderr, "Can't find all threads of pid %d\n",
584 target_pid);
585 usage_with_options(stat_usage, options);
586 }
587 } else {
588 all_tids=malloc(sizeof(pid_t));
589 if (!all_tids)
590 return -ENOMEM;
591 708
592 all_tids[0] = target_tid; 709 if (cpus == NULL) {
593 thread_num = 1; 710 perror("failed to parse CPUs map");
711 usage_with_options(stat_usage, options);
712 return -1;
594 } 713 }
595 714
596 for (i = 0; i < MAX_NR_CPUS; i++) { 715 list_for_each_entry(pos, &evsel_list, node) {
597 for (j = 0; j < MAX_COUNTERS; j++) { 716 if (perf_evsel__alloc_stat_priv(pos) < 0 ||
598 fd[i][j] = malloc(sizeof(int)*thread_num); 717 perf_evsel__alloc_counts(pos, cpus->nr) < 0 ||
599 if (!fd[i][j]) 718 perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
600 return -ENOMEM; 719 goto out_free_fd;
601 }
602 } 720 }
603 721
604 /* 722 /*
@@ -621,6 +739,11 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
621 739
622 if (status != -1) 740 if (status != -1)
623 print_stat(argc, argv); 741 print_stat(argc, argv);
624 742out_free_fd:
743 list_for_each_entry(pos, &evsel_list, node)
744 perf_evsel__free_stat_priv(pos);
745out:
746 thread_map__delete(threads);
747 threads = NULL;
625 return status; 748 return status;
626} 749}
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 035b9fa063a9..1c984342a579 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -119,10 +119,16 @@ static int test__vmlinux_matches_kallsyms(void)
119 * end addresses too. 119 * end addresses too.
120 */ 120 */
121 for (nd = rb_first(&vmlinux_map->dso->symbols[type]); nd; nd = rb_next(nd)) { 121 for (nd = rb_first(&vmlinux_map->dso->symbols[type]); nd; nd = rb_next(nd)) {
122 struct symbol *pair; 122 struct symbol *pair, *first_pair;
123 bool backwards = true;
123 124
124 sym = rb_entry(nd, struct symbol, rb_node); 125 sym = rb_entry(nd, struct symbol, rb_node);
125 pair = machine__find_kernel_symbol(&kallsyms, type, sym->start, NULL, NULL); 126
127 if (sym->start == sym->end)
128 continue;
129
130 first_pair = machine__find_kernel_symbol(&kallsyms, type, sym->start, NULL, NULL);
131 pair = first_pair;
126 132
127 if (pair && pair->start == sym->start) { 133 if (pair && pair->start == sym->start) {
128next_pair: 134next_pair:
@@ -143,8 +149,10 @@ next_pair:
143 pr_debug("%#Lx: diff end addr for %s v: %#Lx k: %#Lx\n", 149 pr_debug("%#Lx: diff end addr for %s v: %#Lx k: %#Lx\n",
144 sym->start, sym->name, sym->end, pair->end); 150 sym->start, sym->name, sym->end, pair->end);
145 } else { 151 } else {
146 struct rb_node *nnd = rb_prev(&pair->rb_node); 152 struct rb_node *nnd;
147 153detour:
154 nnd = backwards ? rb_prev(&pair->rb_node) :
155 rb_next(&pair->rb_node);
148 if (nnd) { 156 if (nnd) {
149 struct symbol *next = rb_entry(nnd, struct symbol, rb_node); 157 struct symbol *next = rb_entry(nnd, struct symbol, rb_node);
150 158
@@ -153,6 +161,13 @@ next_pair:
153 goto next_pair; 161 goto next_pair;
154 } 162 }
155 } 163 }
164
165 if (backwards) {
166 backwards = false;
167 pair = first_pair;
168 goto detour;
169 }
170
156 pr_debug("%#Lx: diff name v: %s k: %s\n", 171 pr_debug("%#Lx: diff name v: %s k: %s\n",
157 sym->start, sym->name, pair->name); 172 sym->start, sym->name, pair->name);
158 } 173 }
@@ -219,6 +234,89 @@ out:
219 return err; 234 return err;
220} 235}
221 236
237#include "util/evsel.h"
238#include <sys/types.h>
239
240static int trace_event__id(const char *event_name)
241{
242 char *filename;
243 int err = -1, fd;
244
245 if (asprintf(&filename,
246 "/sys/kernel/debug/tracing/events/syscalls/%s/id",
247 event_name) < 0)
248 return -1;
249
250 fd = open(filename, O_RDONLY);
251 if (fd >= 0) {
252 char id[16];
253 if (read(fd, id, sizeof(id)) > 0)
254 err = atoi(id);
255 close(fd);
256 }
257
258 free(filename);
259 return err;
260}
261
262static int test__open_syscall_event(void)
263{
264 int err = -1, fd;
265 struct thread_map *threads;
266 struct perf_evsel *evsel;
267 unsigned int nr_open_calls = 111, i;
268 int id = trace_event__id("sys_enter_open");
269
270 if (id < 0) {
271 pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
272 return -1;
273 }
274
275 threads = thread_map__new(-1, getpid());
276 if (threads == NULL) {
277 pr_debug("thread_map__new\n");
278 return -1;
279 }
280
281 evsel = perf_evsel__new(PERF_TYPE_TRACEPOINT, id, 0);
282 if (evsel == NULL) {
283 pr_debug("perf_evsel__new\n");
284 goto out_thread_map_delete;
285 }
286
287 if (perf_evsel__open_per_thread(evsel, threads) < 0) {
288 pr_debug("failed to open counter: %s, "
289 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
290 strerror(errno));
291 goto out_evsel_delete;
292 }
293
294 for (i = 0; i < nr_open_calls; ++i) {
295 fd = open("/etc/passwd", O_RDONLY);
296 close(fd);
297 }
298
299 if (perf_evsel__read_on_cpu(evsel, 0, 0) < 0) {
300 pr_debug("perf_evsel__open_read_on_cpu\n");
301 goto out_close_fd;
302 }
303
304 if (evsel->counts->cpu[0].val != nr_open_calls) {
305 pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %Ld\n",
306 nr_open_calls, evsel->counts->cpu[0].val);
307 goto out_close_fd;
308 }
309
310 err = 0;
311out_close_fd:
312 perf_evsel__close_fd(evsel, 1, threads->nr);
313out_evsel_delete:
314 perf_evsel__delete(evsel);
315out_thread_map_delete:
316 thread_map__delete(threads);
317 return err;
318}
319
222static struct test { 320static struct test {
223 const char *desc; 321 const char *desc;
224 int (*func)(void); 322 int (*func)(void);
@@ -228,6 +326,10 @@ static struct test {
228 .func = test__vmlinux_matches_kallsyms, 326 .func = test__vmlinux_matches_kallsyms,
229 }, 327 },
230 { 328 {
329 .desc = "detect open syscall event",
330 .func = test__open_syscall_event,
331 },
332 {
231 .func = NULL, 333 .func = NULL,
232 }, 334 },
233}; 335};
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 9bcc38f0b706..746cf03cb05d 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -32,6 +32,10 @@
32#include "util/session.h" 32#include "util/session.h"
33#include "util/svghelper.h" 33#include "util/svghelper.h"
34 34
35#define SUPPORT_OLD_POWER_EVENTS 1
36#define PWR_EVENT_EXIT -1
37
38
35static char const *input_name = "perf.data"; 39static char const *input_name = "perf.data";
36static char const *output_name = "output.svg"; 40static char const *output_name = "output.svg";
37 41
@@ -272,19 +276,22 @@ static int cpus_cstate_state[MAX_CPUS];
272static u64 cpus_pstate_start_times[MAX_CPUS]; 276static u64 cpus_pstate_start_times[MAX_CPUS];
273static u64 cpus_pstate_state[MAX_CPUS]; 277static u64 cpus_pstate_state[MAX_CPUS];
274 278
275static int process_comm_event(event_t *event, struct perf_session *session __used) 279static int process_comm_event(event_t *event, struct sample_data *sample __used,
280 struct perf_session *session __used)
276{ 281{
277 pid_set_comm(event->comm.tid, event->comm.comm); 282 pid_set_comm(event->comm.tid, event->comm.comm);
278 return 0; 283 return 0;
279} 284}
280 285
281static int process_fork_event(event_t *event, struct perf_session *session __used) 286static int process_fork_event(event_t *event, struct sample_data *sample __used,
287 struct perf_session *session __used)
282{ 288{
283 pid_fork(event->fork.pid, event->fork.ppid, event->fork.time); 289 pid_fork(event->fork.pid, event->fork.ppid, event->fork.time);
284 return 0; 290 return 0;
285} 291}
286 292
287static int process_exit_event(event_t *event, struct perf_session *session __used) 293static int process_exit_event(event_t *event, struct sample_data *sample __used,
294 struct perf_session *session __used)
288{ 295{
289 pid_exit(event->fork.pid, event->fork.time); 296 pid_exit(event->fork.pid, event->fork.time);
290 return 0; 297 return 0;
@@ -298,12 +305,21 @@ struct trace_entry {
298 int lock_depth; 305 int lock_depth;
299}; 306};
300 307
301struct power_entry { 308#ifdef SUPPORT_OLD_POWER_EVENTS
309static int use_old_power_events;
310struct power_entry_old {
302 struct trace_entry te; 311 struct trace_entry te;
303 u64 type; 312 u64 type;
304 u64 value; 313 u64 value;
305 u64 cpu_id; 314 u64 cpu_id;
306}; 315};
316#endif
317
318struct power_processor_entry {
319 struct trace_entry te;
320 u32 state;
321 u32 cpu_id;
322};
307 323
308#define TASK_COMM_LEN 16 324#define TASK_COMM_LEN 16
309struct wakeup_entry { 325struct wakeup_entry {
@@ -470,48 +486,65 @@ static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te)
470} 486}
471 487
472 488
473static int process_sample_event(event_t *event, struct perf_session *session) 489static int process_sample_event(event_t *event __used,
490 struct sample_data *sample,
491 struct perf_session *session)
474{ 492{
475 struct sample_data data;
476 struct trace_entry *te; 493 struct trace_entry *te;
477 494
478 memset(&data, 0, sizeof(data));
479
480 event__parse_sample(event, session->sample_type, &data);
481
482 if (session->sample_type & PERF_SAMPLE_TIME) { 495 if (session->sample_type & PERF_SAMPLE_TIME) {
483 if (!first_time || first_time > data.time) 496 if (!first_time || first_time > sample->time)
484 first_time = data.time; 497 first_time = sample->time;
485 if (last_time < data.time) 498 if (last_time < sample->time)
486 last_time = data.time; 499 last_time = sample->time;
487 } 500 }
488 501
489 te = (void *)data.raw_data; 502 te = (void *)sample->raw_data;
490 if (session->sample_type & PERF_SAMPLE_RAW && data.raw_size > 0) { 503 if (session->sample_type & PERF_SAMPLE_RAW && sample->raw_size > 0) {
491 char *event_str; 504 char *event_str;
492 struct power_entry *pe; 505#ifdef SUPPORT_OLD_POWER_EVENTS
493 506 struct power_entry_old *peo;
494 pe = (void *)te; 507 peo = (void *)te;
495 508#endif
496 event_str = perf_header__find_event(te->type); 509 event_str = perf_header__find_event(te->type);
497 510
498 if (!event_str) 511 if (!event_str)
499 return 0; 512 return 0;
500 513
501 if (strcmp(event_str, "power:power_start") == 0) 514 if (strcmp(event_str, "power:cpu_idle") == 0) {
502 c_state_start(pe->cpu_id, data.time, pe->value); 515 struct power_processor_entry *ppe = (void *)te;
516 if (ppe->state == (u32)PWR_EVENT_EXIT)
517 c_state_end(ppe->cpu_id, sample->time);
518 else
519 c_state_start(ppe->cpu_id, sample->time,
520 ppe->state);
521 }
522 else if (strcmp(event_str, "power:cpu_frequency") == 0) {
523 struct power_processor_entry *ppe = (void *)te;
524 p_state_change(ppe->cpu_id, sample->time, ppe->state);
525 }
526
527 else if (strcmp(event_str, "sched:sched_wakeup") == 0)
528 sched_wakeup(sample->cpu, sample->time, sample->pid, te);
503 529
504 if (strcmp(event_str, "power:power_end") == 0) 530 else if (strcmp(event_str, "sched:sched_switch") == 0)
505 c_state_end(pe->cpu_id, data.time); 531 sched_switch(sample->cpu, sample->time, te);
506 532
507 if (strcmp(event_str, "power:power_frequency") == 0) 533#ifdef SUPPORT_OLD_POWER_EVENTS
508 p_state_change(pe->cpu_id, data.time, pe->value); 534 if (use_old_power_events) {
535 if (strcmp(event_str, "power:power_start") == 0)
536 c_state_start(peo->cpu_id, sample->time,
537 peo->value);
509 538
510 if (strcmp(event_str, "sched:sched_wakeup") == 0) 539 else if (strcmp(event_str, "power:power_end") == 0)
511 sched_wakeup(data.cpu, data.time, data.pid, te); 540 c_state_end(sample->cpu, sample->time);
512 541
513 if (strcmp(event_str, "sched:sched_switch") == 0) 542 else if (strcmp(event_str,
514 sched_switch(data.cpu, data.time, te); 543 "power:power_frequency") == 0)
544 p_state_change(peo->cpu_id, sample->time,
545 peo->value);
546 }
547#endif
515 } 548 }
516 return 0; 549 return 0;
517} 550}
@@ -937,7 +970,8 @@ static struct perf_event_ops event_ops = {
937 970
938static int __cmd_timechart(void) 971static int __cmd_timechart(void)
939{ 972{
940 struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0, false); 973 struct perf_session *session = perf_session__new(input_name, O_RDONLY,
974 0, false, &event_ops);
941 int ret = -EINVAL; 975 int ret = -EINVAL;
942 976
943 if (session == NULL) 977 if (session == NULL)
@@ -968,7 +1002,8 @@ static const char * const timechart_usage[] = {
968 NULL 1002 NULL
969}; 1003};
970 1004
971static const char *record_args[] = { 1005#ifdef SUPPORT_OLD_POWER_EVENTS
1006static const char * const record_old_args[] = {
972 "record", 1007 "record",
973 "-a", 1008 "-a",
974 "-R", 1009 "-R",
@@ -980,16 +1015,43 @@ static const char *record_args[] = {
980 "-e", "sched:sched_wakeup", 1015 "-e", "sched:sched_wakeup",
981 "-e", "sched:sched_switch", 1016 "-e", "sched:sched_switch",
982}; 1017};
1018#endif
1019
1020static const char * const record_new_args[] = {
1021 "record",
1022 "-a",
1023 "-R",
1024 "-f",
1025 "-c", "1",
1026 "-e", "power:cpu_frequency",
1027 "-e", "power:cpu_idle",
1028 "-e", "sched:sched_wakeup",
1029 "-e", "sched:sched_switch",
1030};
983 1031
984static int __cmd_record(int argc, const char **argv) 1032static int __cmd_record(int argc, const char **argv)
985{ 1033{
986 unsigned int rec_argc, i, j; 1034 unsigned int rec_argc, i, j;
987 const char **rec_argv; 1035 const char **rec_argv;
1036 const char * const *record_args = record_new_args;
1037 unsigned int record_elems = ARRAY_SIZE(record_new_args);
1038
1039#ifdef SUPPORT_OLD_POWER_EVENTS
1040 if (!is_valid_tracepoint("power:cpu_idle") &&
1041 is_valid_tracepoint("power:power_start")) {
1042 use_old_power_events = 1;
1043 record_args = record_old_args;
1044 record_elems = ARRAY_SIZE(record_old_args);
1045 }
1046#endif
988 1047
989 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 1048 rec_argc = record_elems + argc - 1;
990 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 1049 rec_argv = calloc(rec_argc + 1, sizeof(char *));
991 1050
992 for (i = 0; i < ARRAY_SIZE(record_args); i++) 1051 if (rec_argv == NULL)
1052 return -ENOMEM;
1053
1054 for (i = 0; i < record_elems; i++)
993 rec_argv[i] = strdup(record_args[i]); 1055 rec_argv[i] = strdup(record_args[i]);
994 1056
995 for (j = 1; j < (unsigned int)argc; j++, i++) 1057 for (j = 1; j < (unsigned int)argc; j++, i++)
@@ -1018,6 +1080,8 @@ static const struct option options[] = {
1018 OPT_CALLBACK('p', "process", NULL, "process", 1080 OPT_CALLBACK('p', "process", NULL, "process",
1019 "process selector. Pass a pid or process name.", 1081 "process selector. Pass a pid or process name.",
1020 parse_process), 1082 parse_process),
1083 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
1084 "Look for files with symbols relative to this directory"),
1021 OPT_END() 1085 OPT_END()
1022}; 1086};
1023 1087
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index dd625808c2a5..1e67ab9c7ebc 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -21,6 +21,7 @@
21#include "perf.h" 21#include "perf.h"
22 22
23#include "util/color.h" 23#include "util/color.h"
24#include "util/evsel.h"
24#include "util/session.h" 25#include "util/session.h"
25#include "util/symbol.h" 26#include "util/symbol.h"
26#include "util/thread.h" 27#include "util/thread.h"
@@ -29,6 +30,7 @@
29#include "util/parse-options.h" 30#include "util/parse-options.h"
30#include "util/parse-events.h" 31#include "util/parse-events.h"
31#include "util/cpumap.h" 32#include "util/cpumap.h"
33#include "util/xyarray.h"
32 34
33#include "util/debug.h" 35#include "util/debug.h"
34 36
@@ -55,7 +57,7 @@
55#include <linux/unistd.h> 57#include <linux/unistd.h>
56#include <linux/types.h> 58#include <linux/types.h>
57 59
58static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; 60#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
59 61
60static bool system_wide = false; 62static bool system_wide = false;
61 63
@@ -66,10 +68,9 @@ static int print_entries;
66 68
67static int target_pid = -1; 69static int target_pid = -1;
68static int target_tid = -1; 70static int target_tid = -1;
69static pid_t *all_tids = NULL; 71static struct thread_map *threads;
70static int thread_num = 0;
71static bool inherit = false; 72static bool inherit = false;
72static int nr_cpus = 0; 73static struct cpu_map *cpus;
73static int realtime_prio = 0; 74static int realtime_prio = 0;
74static bool group = false; 75static bool group = false;
75static unsigned int page_size; 76static unsigned int page_size;
@@ -100,6 +101,7 @@ struct sym_entry *sym_filter_entry = NULL;
100struct sym_entry *sym_filter_entry_sched = NULL; 101struct sym_entry *sym_filter_entry_sched = NULL;
101static int sym_pcnt_filter = 5; 102static int sym_pcnt_filter = 5;
102static int sym_counter = 0; 103static int sym_counter = 0;
104static struct perf_evsel *sym_evsel = NULL;
103static int display_weighted = -1; 105static int display_weighted = -1;
104static const char *cpu_list; 106static const char *cpu_list;
105 107
@@ -353,7 +355,7 @@ static void show_details(struct sym_entry *syme)
353 return; 355 return;
354 356
355 symbol = sym_entry__symbol(syme); 357 symbol = sym_entry__symbol(syme);
356 printf("Showing %s for %s\n", event_name(sym_counter), symbol->name); 358 printf("Showing %s for %s\n", event_name(sym_evsel), symbol->name);
357 printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter); 359 printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter);
358 360
359 pthread_mutex_lock(&syme->src->lock); 361 pthread_mutex_lock(&syme->src->lock);
@@ -460,7 +462,8 @@ static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
460static void print_sym_table(void) 462static void print_sym_table(void)
461{ 463{
462 int printed = 0, j; 464 int printed = 0, j;
463 int counter, snap = !display_weighted ? sym_counter : 0; 465 struct perf_evsel *counter;
466 int snap = !display_weighted ? sym_counter : 0;
464 float samples_per_sec = samples/delay_secs; 467 float samples_per_sec = samples/delay_secs;
465 float ksamples_per_sec = kernel_samples/delay_secs; 468 float ksamples_per_sec = kernel_samples/delay_secs;
466 float us_samples_per_sec = (us_samples)/delay_secs; 469 float us_samples_per_sec = (us_samples)/delay_secs;
@@ -532,7 +535,9 @@ static void print_sym_table(void)
532 } 535 }
533 536
534 if (nr_counters == 1 || !display_weighted) { 537 if (nr_counters == 1 || !display_weighted) {
535 printf("%Ld", (u64)attrs[0].sample_period); 538 struct perf_evsel *first;
539 first = list_entry(evsel_list.next, struct perf_evsel, node);
540 printf("%Ld", first->attr.sample_period);
536 if (freq) 541 if (freq)
537 printf("Hz "); 542 printf("Hz ");
538 else 543 else
@@ -540,9 +545,9 @@ static void print_sym_table(void)
540 } 545 }
541 546
542 if (!display_weighted) 547 if (!display_weighted)
543 printf("%s", event_name(sym_counter)); 548 printf("%s", event_name(sym_evsel));
544 else for (counter = 0; counter < nr_counters; counter++) { 549 else list_for_each_entry(counter, &evsel_list, node) {
545 if (counter) 550 if (counter->idx)
546 printf("/"); 551 printf("/");
547 552
548 printf("%s", event_name(counter)); 553 printf("%s", event_name(counter));
@@ -558,12 +563,12 @@ static void print_sym_table(void)
558 printf(" (all"); 563 printf(" (all");
559 564
560 if (cpu_list) 565 if (cpu_list)
561 printf(", CPU%s: %s)\n", nr_cpus > 1 ? "s" : "", cpu_list); 566 printf(", CPU%s: %s)\n", cpus->nr > 1 ? "s" : "", cpu_list);
562 else { 567 else {
563 if (target_tid != -1) 568 if (target_tid != -1)
564 printf(")\n"); 569 printf(")\n");
565 else 570 else
566 printf(", %d CPU%s)\n", nr_cpus, nr_cpus > 1 ? "s" : ""); 571 printf(", %d CPU%s)\n", cpus->nr, cpus->nr > 1 ? "s" : "");
567 } 572 }
568 573
569 printf("%-*.*s\n", win_width, win_width, graph_dotted_line); 574 printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
@@ -739,7 +744,7 @@ static void print_mapped_keys(void)
739 fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries); 744 fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries);
740 745
741 if (nr_counters > 1) 746 if (nr_counters > 1)
742 fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_counter)); 747 fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_evsel));
743 748
744 fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter); 749 fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter);
745 750
@@ -826,19 +831,23 @@ static void handle_keypress(struct perf_session *session, int c)
826 break; 831 break;
827 case 'E': 832 case 'E':
828 if (nr_counters > 1) { 833 if (nr_counters > 1) {
829 int i;
830
831 fprintf(stderr, "\nAvailable events:"); 834 fprintf(stderr, "\nAvailable events:");
832 for (i = 0; i < nr_counters; i++) 835
833 fprintf(stderr, "\n\t%d %s", i, event_name(i)); 836 list_for_each_entry(sym_evsel, &evsel_list, node)
837 fprintf(stderr, "\n\t%d %s", sym_evsel->idx, event_name(sym_evsel));
834 838
835 prompt_integer(&sym_counter, "Enter details event counter"); 839 prompt_integer(&sym_counter, "Enter details event counter");
836 840
837 if (sym_counter >= nr_counters) { 841 if (sym_counter >= nr_counters) {
838 fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0)); 842 sym_evsel = list_entry(evsel_list.next, struct perf_evsel, node);
839 sym_counter = 0; 843 sym_counter = 0;
844 fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(sym_evsel));
840 sleep(1); 845 sleep(1);
846 break;
841 } 847 }
848 list_for_each_entry(sym_evsel, &evsel_list, node)
849 if (sym_evsel->idx == sym_counter)
850 break;
842 } else sym_counter = 0; 851 } else sym_counter = 0;
843 break; 852 break;
844 case 'f': 853 case 'f':
@@ -977,12 +986,13 @@ static int symbol_filter(struct map *map, struct symbol *sym)
977} 986}
978 987
979static void event__process_sample(const event_t *self, 988static void event__process_sample(const event_t *self,
980 struct perf_session *session, int counter) 989 struct sample_data *sample,
990 struct perf_session *session,
991 struct perf_evsel *evsel)
981{ 992{
982 u64 ip = self->ip.ip; 993 u64 ip = self->ip.ip;
983 struct sym_entry *syme; 994 struct sym_entry *syme;
984 struct addr_location al; 995 struct addr_location al;
985 struct sample_data data;
986 struct machine *machine; 996 struct machine *machine;
987 u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 997 u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
988 998
@@ -1025,7 +1035,7 @@ static void event__process_sample(const event_t *self,
1025 if (self->header.misc & PERF_RECORD_MISC_EXACT_IP) 1035 if (self->header.misc & PERF_RECORD_MISC_EXACT_IP)
1026 exact_samples++; 1036 exact_samples++;
1027 1037
1028 if (event__preprocess_sample(self, session, &al, &data, 1038 if (event__preprocess_sample(self, session, &al, sample,
1029 symbol_filter) < 0 || 1039 symbol_filter) < 0 ||
1030 al.filtered) 1040 al.filtered)
1031 return; 1041 return;
@@ -1071,9 +1081,9 @@ static void event__process_sample(const event_t *self,
1071 1081
1072 syme = symbol__priv(al.sym); 1082 syme = symbol__priv(al.sym);
1073 if (!syme->skip) { 1083 if (!syme->skip) {
1074 syme->count[counter]++; 1084 syme->count[evsel->idx]++;
1075 syme->origin = origin; 1085 syme->origin = origin;
1076 record_precise_ip(syme, counter, ip); 1086 record_precise_ip(syme, evsel->idx, ip);
1077 pthread_mutex_lock(&active_symbols_lock); 1087 pthread_mutex_lock(&active_symbols_lock);
1078 if (list_empty(&syme->node) || !syme->node.next) 1088 if (list_empty(&syme->node) || !syme->node.next)
1079 __list_insert_active_sym(syme); 1089 __list_insert_active_sym(syme);
@@ -1082,12 +1092,24 @@ static void event__process_sample(const event_t *self,
1082} 1092}
1083 1093
1084struct mmap_data { 1094struct mmap_data {
1085 int counter;
1086 void *base; 1095 void *base;
1087 int mask; 1096 int mask;
1088 unsigned int prev; 1097 unsigned int prev;
1089}; 1098};
1090 1099
1100static int perf_evsel__alloc_mmap_per_thread(struct perf_evsel *evsel,
1101 int ncpus, int nthreads)
1102{
1103 evsel->priv = xyarray__new(ncpus, nthreads, sizeof(struct mmap_data));
1104 return evsel->priv != NULL ? 0 : -ENOMEM;
1105}
1106
1107static void perf_evsel__free_mmap(struct perf_evsel *evsel)
1108{
1109 xyarray__delete(evsel->priv);
1110 evsel->priv = NULL;
1111}
1112
1091static unsigned int mmap_read_head(struct mmap_data *md) 1113static unsigned int mmap_read_head(struct mmap_data *md)
1092{ 1114{
1093 struct perf_event_mmap_page *pc = md->base; 1115 struct perf_event_mmap_page *pc = md->base;
@@ -1100,11 +1122,15 @@ static unsigned int mmap_read_head(struct mmap_data *md)
1100} 1122}
1101 1123
1102static void perf_session__mmap_read_counter(struct perf_session *self, 1124static void perf_session__mmap_read_counter(struct perf_session *self,
1103 struct mmap_data *md) 1125 struct perf_evsel *evsel,
1126 int cpu, int thread_idx)
1104{ 1127{
1128 struct xyarray *mmap_array = evsel->priv;
1129 struct mmap_data *md = xyarray__entry(mmap_array, cpu, thread_idx);
1105 unsigned int head = mmap_read_head(md); 1130 unsigned int head = mmap_read_head(md);
1106 unsigned int old = md->prev; 1131 unsigned int old = md->prev;
1107 unsigned char *data = md->base + page_size; 1132 unsigned char *data = md->base + page_size;
1133 struct sample_data sample;
1108 int diff; 1134 int diff;
1109 1135
1110 /* 1136 /*
@@ -1152,10 +1178,11 @@ static void perf_session__mmap_read_counter(struct perf_session *self,
1152 event = &event_copy; 1178 event = &event_copy;
1153 } 1179 }
1154 1180
1181 event__parse_sample(event, self, &sample);
1155 if (event->header.type == PERF_RECORD_SAMPLE) 1182 if (event->header.type == PERF_RECORD_SAMPLE)
1156 event__process_sample(event, self, md->counter); 1183 event__process_sample(event, &sample, self, evsel);
1157 else 1184 else
1158 event__process(event, self); 1185 event__process(event, &sample, self);
1159 old += size; 1186 old += size;
1160 } 1187 }
1161 1188
@@ -1163,36 +1190,39 @@ static void perf_session__mmap_read_counter(struct perf_session *self,
1163} 1190}
1164 1191
1165static struct pollfd *event_array; 1192static struct pollfd *event_array;
1166static struct mmap_data *mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
1167 1193
1168static void perf_session__mmap_read(struct perf_session *self) 1194static void perf_session__mmap_read(struct perf_session *self)
1169{ 1195{
1170 int i, counter, thread_index; 1196 struct perf_evsel *counter;
1197 int i, thread_index;
1171 1198
1172 for (i = 0; i < nr_cpus; i++) { 1199 for (i = 0; i < cpus->nr; i++) {
1173 for (counter = 0; counter < nr_counters; counter++) 1200 list_for_each_entry(counter, &evsel_list, node) {
1174 for (thread_index = 0; 1201 for (thread_index = 0;
1175 thread_index < thread_num; 1202 thread_index < threads->nr;
1176 thread_index++) { 1203 thread_index++) {
1177 perf_session__mmap_read_counter(self, 1204 perf_session__mmap_read_counter(self,
1178 &mmap_array[i][counter][thread_index]); 1205 counter, i, thread_index);
1179 } 1206 }
1207 }
1180 } 1208 }
1181} 1209}
1182 1210
1183int nr_poll; 1211int nr_poll;
1184int group_fd; 1212int group_fd;
1185 1213
1186static void start_counter(int i, int counter) 1214static void start_counter(int i, struct perf_evsel *evsel)
1187{ 1215{
1216 struct xyarray *mmap_array = evsel->priv;
1217 struct mmap_data *mm;
1188 struct perf_event_attr *attr; 1218 struct perf_event_attr *attr;
1189 int cpu = -1; 1219 int cpu = -1;
1190 int thread_index; 1220 int thread_index;
1191 1221
1192 if (target_tid == -1) 1222 if (target_tid == -1)
1193 cpu = cpumap[i]; 1223 cpu = cpus->map[i];
1194 1224
1195 attr = attrs + counter; 1225 attr = &evsel->attr;
1196 1226
1197 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; 1227 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
1198 1228
@@ -1205,16 +1235,18 @@ static void start_counter(int i, int counter)
1205 attr->inherit = (cpu < 0) && inherit; 1235 attr->inherit = (cpu < 0) && inherit;
1206 attr->mmap = 1; 1236 attr->mmap = 1;
1207 1237
1208 for (thread_index = 0; thread_index < thread_num; thread_index++) { 1238 for (thread_index = 0; thread_index < threads->nr; thread_index++) {
1209try_again: 1239try_again:
1210 fd[i][counter][thread_index] = sys_perf_event_open(attr, 1240 FD(evsel, i, thread_index) = sys_perf_event_open(attr,
1211 all_tids[thread_index], cpu, group_fd, 0); 1241 threads->map[thread_index], cpu, group_fd, 0);
1212 1242
1213 if (fd[i][counter][thread_index] < 0) { 1243 if (FD(evsel, i, thread_index) < 0) {
1214 int err = errno; 1244 int err = errno;
1215 1245
1216 if (err == EPERM || err == EACCES) 1246 if (err == EPERM || err == EACCES)
1217 die("No permission - are you root?\n"); 1247 die("Permission error - are you root?\n"
1248 "\t Consider tweaking"
1249 " /proc/sys/kernel/perf_event_paranoid.\n");
1218 /* 1250 /*
1219 * If it's cycles then fall back to hrtimer 1251 * If it's cycles then fall back to hrtimer
1220 * based cpu-clock-tick sw counter, which 1252 * based cpu-clock-tick sw counter, which
@@ -1231,30 +1263,30 @@ try_again:
1231 goto try_again; 1263 goto try_again;
1232 } 1264 }
1233 printf("\n"); 1265 printf("\n");
1234 error("perfcounter syscall returned with %d (%s)\n", 1266 error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
1235 fd[i][counter][thread_index], strerror(err)); 1267 FD(evsel, i, thread_index), strerror(err));
1236 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 1268 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
1237 exit(-1); 1269 exit(-1);
1238 } 1270 }
1239 assert(fd[i][counter][thread_index] >= 0); 1271 assert(FD(evsel, i, thread_index) >= 0);
1240 fcntl(fd[i][counter][thread_index], F_SETFL, O_NONBLOCK); 1272 fcntl(FD(evsel, i, thread_index), F_SETFL, O_NONBLOCK);
1241 1273
1242 /* 1274 /*
1243 * First counter acts as the group leader: 1275 * First counter acts as the group leader:
1244 */ 1276 */
1245 if (group && group_fd == -1) 1277 if (group && group_fd == -1)
1246 group_fd = fd[i][counter][thread_index]; 1278 group_fd = FD(evsel, i, thread_index);
1247 1279
1248 event_array[nr_poll].fd = fd[i][counter][thread_index]; 1280 event_array[nr_poll].fd = FD(evsel, i, thread_index);
1249 event_array[nr_poll].events = POLLIN; 1281 event_array[nr_poll].events = POLLIN;
1250 nr_poll++; 1282 nr_poll++;
1251 1283
1252 mmap_array[i][counter][thread_index].counter = counter; 1284 mm = xyarray__entry(mmap_array, i, thread_index);
1253 mmap_array[i][counter][thread_index].prev = 0; 1285 mm->prev = 0;
1254 mmap_array[i][counter][thread_index].mask = mmap_pages*page_size - 1; 1286 mm->mask = mmap_pages*page_size - 1;
1255 mmap_array[i][counter][thread_index].base = mmap(NULL, (mmap_pages+1)*page_size, 1287 mm->base = mmap(NULL, (mmap_pages+1)*page_size,
1256 PROT_READ, MAP_SHARED, fd[i][counter][thread_index], 0); 1288 PROT_READ, MAP_SHARED, FD(evsel, i, thread_index), 0);
1257 if (mmap_array[i][counter][thread_index].base == MAP_FAILED) 1289 if (mm->base == MAP_FAILED)
1258 die("failed to mmap with %d (%s)\n", errno, strerror(errno)); 1290 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
1259 } 1291 }
1260} 1292}
@@ -1262,13 +1294,13 @@ try_again:
1262static int __cmd_top(void) 1294static int __cmd_top(void)
1263{ 1295{
1264 pthread_t thread; 1296 pthread_t thread;
1265 int i, counter; 1297 struct perf_evsel *counter;
1266 int ret; 1298 int i, ret;
1267 /* 1299 /*
1268 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this 1300 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
1269 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now. 1301 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
1270 */ 1302 */
1271 struct perf_session *session = perf_session__new(NULL, O_WRONLY, false, false); 1303 struct perf_session *session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
1272 if (session == NULL) 1304 if (session == NULL)
1273 return -ENOMEM; 1305 return -ENOMEM;
1274 1306
@@ -1277,9 +1309,9 @@ static int __cmd_top(void)
1277 else 1309 else
1278 event__synthesize_threads(event__process, session); 1310 event__synthesize_threads(event__process, session);
1279 1311
1280 for (i = 0; i < nr_cpus; i++) { 1312 for (i = 0; i < cpus->nr; i++) {
1281 group_fd = -1; 1313 group_fd = -1;
1282 for (counter = 0; counter < nr_counters; counter++) 1314 list_for_each_entry(counter, &evsel_list, node)
1283 start_counter(i, counter); 1315 start_counter(i, counter);
1284 } 1316 }
1285 1317
@@ -1368,8 +1400,8 @@ static const struct option options[] = {
1368 1400
1369int cmd_top(int argc, const char **argv, const char *prefix __used) 1401int cmd_top(int argc, const char **argv, const char *prefix __used)
1370{ 1402{
1371 int counter; 1403 struct perf_evsel *pos;
1372 int i,j; 1404 int status = -ENOMEM;
1373 1405
1374 page_size = sysconf(_SC_PAGE_SIZE); 1406 page_size = sysconf(_SC_PAGE_SIZE);
1375 1407
@@ -1377,34 +1409,17 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1377 if (argc) 1409 if (argc)
1378 usage_with_options(top_usage, options); 1410 usage_with_options(top_usage, options);
1379 1411
1380 if (target_pid != -1) { 1412 if (target_pid != -1)
1381 target_tid = target_pid; 1413 target_tid = target_pid;
1382 thread_num = find_all_tid(target_pid, &all_tids);
1383 if (thread_num <= 0) {
1384 fprintf(stderr, "Can't find all threads of pid %d\n",
1385 target_pid);
1386 usage_with_options(top_usage, options);
1387 }
1388 } else {
1389 all_tids=malloc(sizeof(pid_t));
1390 if (!all_tids)
1391 return -ENOMEM;
1392 1414
1393 all_tids[0] = target_tid; 1415 threads = thread_map__new(target_pid, target_tid);
1394 thread_num = 1; 1416 if (threads == NULL) {
1417 pr_err("Problems finding threads of monitor\n");
1418 usage_with_options(top_usage, options);
1395 } 1419 }
1396 1420
1397 for (i = 0; i < MAX_NR_CPUS; i++) { 1421 event_array = malloc((sizeof(struct pollfd) *
1398 for (j = 0; j < MAX_COUNTERS; j++) { 1422 MAX_NR_CPUS * MAX_COUNTERS * threads->nr));
1399 fd[i][j] = malloc(sizeof(int)*thread_num);
1400 mmap_array[i][j] = zalloc(
1401 sizeof(struct mmap_data)*thread_num);
1402 if (!fd[i][j] || !mmap_array[i][j])
1403 return -ENOMEM;
1404 }
1405 }
1406 event_array = malloc(
1407 sizeof(struct pollfd)*MAX_NR_CPUS*MAX_COUNTERS*thread_num);
1408 if (!event_array) 1423 if (!event_array)
1409 return -ENOMEM; 1424 return -ENOMEM;
1410 1425
@@ -1415,15 +1430,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1415 cpu_list = NULL; 1430 cpu_list = NULL;
1416 } 1431 }
1417 1432
1418 if (!nr_counters) 1433 if (!nr_counters && perf_evsel_list__create_default() < 0) {
1419 nr_counters = 1; 1434 pr_err("Not enough memory for event selector list\n");
1420 1435 return -ENOMEM;
1421 symbol_conf.priv_size = (sizeof(struct sym_entry) + 1436 }
1422 (nr_counters + 1) * sizeof(unsigned long));
1423
1424 symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
1425 if (symbol__init() < 0)
1426 return -1;
1427 1437
1428 if (delay_secs < 1) 1438 if (delay_secs < 1)
1429 delay_secs = 1; 1439 delay_secs = 1;
@@ -1440,23 +1450,33 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1440 exit(EXIT_FAILURE); 1450 exit(EXIT_FAILURE);
1441 } 1451 }
1442 1452
1443 /* 1453 if (target_tid != -1)
1444 * Fill in the ones not specifically initialized via -c: 1454 cpus = cpu_map__dummy_new();
1445 */ 1455 else
1446 for (counter = 0; counter < nr_counters; counter++) { 1456 cpus = cpu_map__new(cpu_list);
1447 if (attrs[counter].sample_period) 1457
1458 if (cpus == NULL)
1459 usage_with_options(top_usage, options);
1460
1461 list_for_each_entry(pos, &evsel_list, node) {
1462 if (perf_evsel__alloc_mmap_per_thread(pos, cpus->nr, threads->nr) < 0 ||
1463 perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
1464 goto out_free_fd;
1465 /*
1466 * Fill in the ones not specifically initialized via -c:
1467 */
1468 if (pos->attr.sample_period)
1448 continue; 1469 continue;
1449 1470
1450 attrs[counter].sample_period = default_interval; 1471 pos->attr.sample_period = default_interval;
1451 } 1472 }
1452 1473
1453 if (target_tid != -1) 1474 symbol_conf.priv_size = (sizeof(struct sym_entry) +
1454 nr_cpus = 1; 1475 (nr_counters + 1) * sizeof(unsigned long));
1455 else
1456 nr_cpus = read_cpu_map(cpu_list);
1457 1476
1458 if (nr_cpus < 1) 1477 symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
1459 usage_with_options(top_usage, options); 1478 if (symbol__init() < 0)
1479 return -1;
1460 1480
1461 get_term_dimensions(&winsize); 1481 get_term_dimensions(&winsize);
1462 if (print_entries == 0) { 1482 if (print_entries == 0) {
@@ -1464,5 +1484,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1464 signal(SIGWINCH, sig_winch_handler); 1484 signal(SIGWINCH, sig_winch_handler);
1465 } 1485 }
1466 1486
1467 return __cmd_top(); 1487 status = __cmd_top();
1488out_free_fd:
1489 list_for_each_entry(pos, &evsel_list, node)
1490 perf_evsel__free_mmap(pos);
1491
1492 return status;
1468} 1493}
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index 921245b28583..c7798c7f24ed 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -27,7 +27,7 @@ extern int cmd_report(int argc, const char **argv, const char *prefix);
27extern int cmd_stat(int argc, const char **argv, const char *prefix); 27extern int cmd_stat(int argc, const char **argv, const char *prefix);
28extern int cmd_timechart(int argc, const char **argv, const char *prefix); 28extern int cmd_timechart(int argc, const char **argv, const char *prefix);
29extern int cmd_top(int argc, const char **argv, const char *prefix); 29extern int cmd_top(int argc, const char **argv, const char *prefix);
30extern int cmd_trace(int argc, const char **argv, const char *prefix); 30extern int cmd_script(int argc, const char **argv, const char *prefix);
31extern int cmd_version(int argc, const char **argv, const char *prefix); 31extern int cmd_version(int argc, const char **argv, const char *prefix);
32extern int cmd_probe(int argc, const char **argv, const char *prefix); 32extern int cmd_probe(int argc, const char **argv, const char *prefix);
33extern int cmd_kmem(int argc, const char **argv, const char *prefix); 33extern int cmd_kmem(int argc, const char **argv, const char *prefix);
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index 949d77fc0b97..16b5088cf8f4 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -16,7 +16,7 @@ perf-report mainporcelain common
16perf-stat mainporcelain common 16perf-stat mainporcelain common
17perf-timechart mainporcelain common 17perf-timechart mainporcelain common
18perf-top mainporcelain common 18perf-top mainporcelain common
19perf-trace mainporcelain common 19perf-script mainporcelain common
20perf-probe mainporcelain common 20perf-probe mainporcelain common
21perf-kmem mainporcelain common 21perf-kmem mainporcelain common
22perf-lock mainporcelain common 22perf-lock mainporcelain common
diff --git a/tools/perf/feature-tests.mak b/tools/perf/feature-tests.mak
index b253db634f04..b041ca67a2cb 100644
--- a/tools/perf/feature-tests.mak
+++ b/tools/perf/feature-tests.mak
@@ -9,8 +9,8 @@ endef
9ifndef NO_DWARF 9ifndef NO_DWARF
10define SOURCE_DWARF 10define SOURCE_DWARF
11#include <dwarf.h> 11#include <dwarf.h>
12#include <libdw.h> 12#include <elfutils/libdw.h>
13#include <version.h> 13#include <elfutils/version.h>
14#ifndef _ELFUTILS_PREREQ 14#ifndef _ELFUTILS_PREREQ
15#error 15#error
16#endif 16#endif
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index cdd6c03f1e14..5b1ecd66bb36 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -286,6 +286,8 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
286 status = p->fn(argc, argv, prefix); 286 status = p->fn(argc, argv, prefix);
287 exit_browser(status); 287 exit_browser(status);
288 288
289 perf_evsel_list__delete();
290
289 if (status) 291 if (status)
290 return status & 0xff; 292 return status & 0xff;
291 293
@@ -323,7 +325,7 @@ static void handle_internal_command(int argc, const char **argv)
323 { "top", cmd_top, 0 }, 325 { "top", cmd_top, 0 },
324 { "annotate", cmd_annotate, 0 }, 326 { "annotate", cmd_annotate, 0 },
325 { "version", cmd_version, 0 }, 327 { "version", cmd_version, 0 },
326 { "trace", cmd_trace, 0 }, 328 { "script", cmd_script, 0 },
327 { "sched", cmd_sched, 0 }, 329 { "sched", cmd_sched, 0 },
328 { "probe", cmd_probe, 0 }, 330 { "probe", cmd_probe, 0 },
329 { "kmem", cmd_kmem, 0 }, 331 { "kmem", cmd_kmem, 0 },
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c
index 01a64ad693f2..790ceba6ad3f 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c
@@ -8,7 +8,7 @@
8 8
9#line 1 "Context.xs" 9#line 1 "Context.xs"
10/* 10/*
11 * Context.xs. XS interfaces for perf trace. 11 * Context.xs. XS interfaces for perf script.
12 * 12 *
13 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> 13 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
14 * 14 *
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs
index 549cf0467d30..c1e2ed1ed34e 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs
@@ -1,5 +1,5 @@
1/* 1/*
2 * Context.xs. XS interfaces for perf trace. 2 * Context.xs. XS interfaces for perf script.
3 * 3 *
4 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> 4 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
5 * 5 *
@@ -23,7 +23,7 @@
23#include "perl.h" 23#include "perl.h"
24#include "XSUB.h" 24#include "XSUB.h"
25#include "../../../perf.h" 25#include "../../../perf.h"
26#include "../../../util/trace-event.h" 26#include "../../../util/script-event.h"
27 27
28MODULE = Perf::Trace::Context PACKAGE = Perf::Trace::Context 28MODULE = Perf::Trace::Context PACKAGE = Perf::Trace::Context
29PROTOTYPES: ENABLE 29PROTOTYPES: ENABLE
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/README b/tools/perf/scripts/perl/Perf-Trace-Util/README
index 9a9707630791..2f0c7f3043ee 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/README
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/README
@@ -1,7 +1,7 @@
1Perf-Trace-Util version 0.01 1Perf-Trace-Util version 0.01
2============================ 2============================
3 3
4This module contains utility functions for use with perf trace. 4This module contains utility functions for use with perf script.
5 5
6Core.pm and Util.pm are pure Perl modules; Core.pm contains routines 6Core.pm and Util.pm are pure Perl modules; Core.pm contains routines
7that the core perf support for Perl calls on and should always be 7that the core perf support for Perl calls on and should always be
@@ -33,7 +33,7 @@ After you do that:
33 33
34INSTALLATION 34INSTALLATION
35 35
36Building perf with perf trace Perl scripting should install this 36Building perf with perf script Perl scripting should install this
37module in the right place. 37module in the right place.
38 38
39You should make sure libperl and ExtUtils/Embed.pm are installed first 39You should make sure libperl and ExtUtils/Embed.pm are installed first
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm
index 6c7f3659cb17..4e2f6039ac92 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm
@@ -34,7 +34,7 @@ Perf::Trace::Context - Perl extension for accessing functions in perf.
34 34
35=head1 SEE ALSO 35=head1 SEE ALSO
36 36
37Perf (trace) documentation 37Perf (script) documentation
38 38
39=head1 AUTHOR 39=head1 AUTHOR
40 40
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm
index 9df376a9f629..9158458d3eeb 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm
@@ -163,7 +163,7 @@ sub dump_symbolic_fields
163__END__ 163__END__
164=head1 NAME 164=head1 NAME
165 165
166Perf::Trace::Core - Perl extension for perf trace 166Perf::Trace::Core - Perl extension for perf script
167 167
168=head1 SYNOPSIS 168=head1 SYNOPSIS
169 169
@@ -171,7 +171,7 @@ Perf::Trace::Core - Perl extension for perf trace
171 171
172=head1 SEE ALSO 172=head1 SEE ALSO
173 173
174Perf (trace) documentation 174Perf (script) documentation
175 175
176=head1 AUTHOR 176=head1 AUTHOR
177 177
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
index d94b40c8ac85..053500114625 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
@@ -65,7 +65,7 @@ sub clear_term
65__END__ 65__END__
66=head1 NAME 66=head1 NAME
67 67
68Perf::Trace::Util - Perl extension for perf trace 68Perf::Trace::Util - Perl extension for perf script
69 69
70=head1 SYNOPSIS 70=head1 SYNOPSIS
71 71
@@ -73,7 +73,7 @@ Perf::Trace::Util - Perl extension for perf trace
73 73
74=head1 SEE ALSO 74=head1 SEE ALSO
75 75
76Perf (trace) documentation 76Perf (script) documentation
77 77
78=head1 AUTHOR 78=head1 AUTHOR
79 79
diff --git a/tools/perf/scripts/perl/bin/failed-syscalls-report b/tools/perf/scripts/perl/bin/failed-syscalls-report
index 4028d92dc4ae..9f83cc1ad8ba 100644
--- a/tools/perf/scripts/perl/bin/failed-syscalls-report
+++ b/tools/perf/scripts/perl/bin/failed-syscalls-report
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
7 shift 7 shift
8 fi 8 fi
9fi 9fi
10perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/failed-syscalls.pl $comm 10perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/failed-syscalls.pl $comm
diff --git a/tools/perf/scripts/perl/bin/rw-by-file-report b/tools/perf/scripts/perl/bin/rw-by-file-report
index ba25f4d41fb0..77200b3f3100 100644
--- a/tools/perf/scripts/perl/bin/rw-by-file-report
+++ b/tools/perf/scripts/perl/bin/rw-by-file-report
@@ -7,7 +7,4 @@ if [ $# -lt 1 ] ; then
7fi 7fi
8comm=$1 8comm=$1
9shift 9shift
10perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-file.pl $comm 10perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-file.pl $comm
11
12
13
diff --git a/tools/perf/scripts/perl/bin/rw-by-pid-report b/tools/perf/scripts/perl/bin/rw-by-pid-report
index 641a3f5d085c..a27b9f311f95 100644
--- a/tools/perf/scripts/perl/bin/rw-by-pid-report
+++ b/tools/perf/scripts/perl/bin/rw-by-pid-report
@@ -1,6 +1,3 @@
1#!/bin/bash 1#!/bin/bash
2# description: system-wide r/w activity 2# description: system-wide r/w activity
3perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-pid.pl 3perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-pid.pl
4
5
6
diff --git a/tools/perf/scripts/perl/bin/rwtop-report b/tools/perf/scripts/perl/bin/rwtop-report
index 4918dba77021..83e11ec2e190 100644
--- a/tools/perf/scripts/perl/bin/rwtop-report
+++ b/tools/perf/scripts/perl/bin/rwtop-report
@@ -17,7 +17,4 @@ if [ "$n_args" -gt 0 ] ; then
17 interval=$1 17 interval=$1
18 shift 18 shift
19fi 19fi
20perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rwtop.pl $interval 20perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rwtop.pl $interval
21
22
23
diff --git a/tools/perf/scripts/perl/bin/wakeup-latency-report b/tools/perf/scripts/perl/bin/wakeup-latency-report
index 49052ebcb632..889e8130cca5 100644
--- a/tools/perf/scripts/perl/bin/wakeup-latency-report
+++ b/tools/perf/scripts/perl/bin/wakeup-latency-report
@@ -1,6 +1,3 @@
1#!/bin/bash 1#!/bin/bash
2# description: system-wide min/max/avg wakeup latency 2# description: system-wide min/max/avg wakeup latency
3perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/wakeup-latency.pl 3perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/wakeup-latency.pl
4
5
6
diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-report b/tools/perf/scripts/perl/bin/workqueue-stats-report
index df0c65f4ca93..6d91411d248c 100644
--- a/tools/perf/scripts/perl/bin/workqueue-stats-report
+++ b/tools/perf/scripts/perl/bin/workqueue-stats-report
@@ -1,7 +1,3 @@
1#!/bin/bash 1#!/bin/bash
2# description: workqueue stats (ins/exe/create/destroy) 2# description: workqueue stats (ins/exe/create/destroy)
3perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/workqueue-stats.pl 3perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/workqueue-stats.pl
4
5
6
7
diff --git a/tools/perf/scripts/perl/check-perf-trace.pl b/tools/perf/scripts/perl/check-perf-trace.pl
index 4e7dc0a407a5..4e7076c20616 100644
--- a/tools/perf/scripts/perl/check-perf-trace.pl
+++ b/tools/perf/scripts/perl/check-perf-trace.pl
@@ -1,4 +1,4 @@
1# perf trace event handlers, generated by perf trace -g perl 1# perf script event handlers, generated by perf script -g perl
2# (c) 2009, Tom Zanussi <tzanussi@gmail.com> 2# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
3# Licensed under the terms of the GNU GPL License version 2 3# Licensed under the terms of the GNU GPL License version 2
4 4
diff --git a/tools/perf/scripts/perl/rw-by-file.pl b/tools/perf/scripts/perl/rw-by-file.pl
index 2a39097687b9..74844ee2be3e 100644
--- a/tools/perf/scripts/perl/rw-by-file.pl
+++ b/tools/perf/scripts/perl/rw-by-file.pl
@@ -18,7 +18,7 @@ use lib "./Perf-Trace-Util/lib";
18use Perf::Trace::Core; 18use Perf::Trace::Core;
19use Perf::Trace::Util; 19use Perf::Trace::Util;
20 20
21my $usage = "perf trace -s rw-by-file.pl <comm>\n"; 21my $usage = "perf script -s rw-by-file.pl <comm>\n";
22 22
23my $for_comm = shift or die $usage; 23my $for_comm = shift or die $usage;
24 24
diff --git a/tools/perf/scripts/perl/workqueue-stats.pl b/tools/perf/scripts/perl/workqueue-stats.pl
index b84b12699b70..a8eaff5119e0 100644
--- a/tools/perf/scripts/perl/workqueue-stats.pl
+++ b/tools/perf/scripts/perl/workqueue-stats.pl
@@ -10,7 +10,7 @@
10# workqueue:workqueue_destruction -e workqueue:workqueue_execution 10# workqueue:workqueue_destruction -e workqueue:workqueue_execution
11# -e workqueue:workqueue_insertion 11# -e workqueue:workqueue_insertion
12# 12#
13# perf trace -p -s tools/perf/scripts/perl/workqueue-stats.pl 13# perf script -p -s tools/perf/scripts/perl/workqueue-stats.pl
14 14
15use 5.010000; 15use 5.010000;
16use strict; 16use strict;
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Context.c b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
index 957085dd5d8d..315067b8f552 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/Context.c
+++ b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Context.c. Python interfaces for perf trace. 2 * Context.c. Python interfaces for perf script.
3 * 3 *
4 * Copyright (C) 2010 Tom Zanussi <tzanussi@gmail.com> 4 * Copyright (C) 2010 Tom Zanussi <tzanussi@gmail.com>
5 * 5 *
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
index aad7525bca1d..de7211e4fa47 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
@@ -1,4 +1,4 @@
1# Core.py - Python extension for perf trace, core functions 1# Core.py - Python extension for perf script, core functions
2# 2#
3# Copyright (C) 2010 by Tom Zanussi <tzanussi@gmail.com> 3# Copyright (C) 2010 by Tom Zanussi <tzanussi@gmail.com>
4# 4#
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
index ae9a56e43e05..fdd92f699055 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
@@ -1,4 +1,4 @@
1# SchedGui.py - Python extension for perf trace, basic GUI code for 1# SchedGui.py - Python extension for perf script, basic GUI code for
2# traces drawing and overview. 2# traces drawing and overview.
3# 3#
4# Copyright (C) 2010 by Frederic Weisbecker <fweisbec@gmail.com> 4# Copyright (C) 2010 by Frederic Weisbecker <fweisbec@gmail.com>
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
index 13cc02b5893a..15c8400240fd 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
@@ -1,4 +1,4 @@
1# Util.py - Python extension for perf trace, miscellaneous utility code 1# Util.py - Python extension for perf script, miscellaneous utility code
2# 2#
3# Copyright (C) 2010 by Tom Zanussi <tzanussi@gmail.com> 3# Copyright (C) 2010 by Tom Zanussi <tzanussi@gmail.com>
4# 4#
diff --git a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
index 03587021463d..fda5096d0cbf 100644
--- a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
+++ b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
7 shift 7 shift
8 fi 8 fi
9fi 9fi
10perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/failed-syscalls-by-pid.py $comm 10perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/failed-syscalls-by-pid.py $comm
diff --git a/tools/perf/scripts/python/bin/futex-contention-report b/tools/perf/scripts/python/bin/futex-contention-report
index c8268138fb7e..6c44271091ab 100644
--- a/tools/perf/scripts/python/bin/futex-contention-report
+++ b/tools/perf/scripts/python/bin/futex-contention-report
@@ -1,4 +1,4 @@
1#!/bin/bash 1#!/bin/bash
2# description: futext contention measurement 2# description: futext contention measurement
3 3
4perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/futex-contention.py 4perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/futex-contention.py
diff --git a/tools/perf/scripts/python/bin/netdev-times-report b/tools/perf/scripts/python/bin/netdev-times-report
index 4ad361b31249..8f759291da86 100644
--- a/tools/perf/scripts/python/bin/netdev-times-report
+++ b/tools/perf/scripts/python/bin/netdev-times-report
@@ -2,4 +2,4 @@
2# description: display a process of packet and processing time 2# description: display a process of packet and processing time
3# args: [tx] [rx] [dev=] [debug] 3# args: [tx] [rx] [dev=] [debug]
4 4
5perf trace -s "$PERF_EXEC_PATH"/scripts/python/netdev-times.py $@ 5perf script -s "$PERF_EXEC_PATH"/scripts/python/netdev-times.py $@
diff --git a/tools/perf/scripts/python/bin/sched-migration-report b/tools/perf/scripts/python/bin/sched-migration-report
index df1791f07c24..68b037a1849b 100644
--- a/tools/perf/scripts/python/bin/sched-migration-report
+++ b/tools/perf/scripts/python/bin/sched-migration-report
@@ -1,3 +1,3 @@
1#!/bin/bash 1#!/bin/bash
2# description: sched migration overview 2# description: sched migration overview
3perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/sched-migration.py 3perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/sched-migration.py
diff --git a/tools/perf/scripts/python/bin/sctop-report b/tools/perf/scripts/python/bin/sctop-report
index 36b409c05e50..c32db294124d 100644
--- a/tools/perf/scripts/python/bin/sctop-report
+++ b/tools/perf/scripts/python/bin/sctop-report
@@ -21,4 +21,4 @@ elif [ "$n_args" -gt 0 ] ; then
21 interval=$1 21 interval=$1
22 shift 22 shift
23fi 23fi
24perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/sctop.py $comm $interval 24perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/sctop.py $comm $interval
diff --git a/tools/perf/scripts/python/bin/syscall-counts-by-pid-report b/tools/perf/scripts/python/bin/syscall-counts-by-pid-report
index 4eb88c9fc83c..16eb8d65c543 100644
--- a/tools/perf/scripts/python/bin/syscall-counts-by-pid-report
+++ b/tools/perf/scripts/python/bin/syscall-counts-by-pid-report
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
7 shift 7 shift
8 fi 8 fi
9fi 9fi
10perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts-by-pid.py $comm 10perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts-by-pid.py $comm
diff --git a/tools/perf/scripts/python/bin/syscall-counts-report b/tools/perf/scripts/python/bin/syscall-counts-report
index cb2f9c5cf17e..0f0e9d453bb4 100644
--- a/tools/perf/scripts/python/bin/syscall-counts-report
+++ b/tools/perf/scripts/python/bin/syscall-counts-report
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
7 shift 7 shift
8 fi 8 fi
9fi 9fi
10perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts.py $comm 10perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts.py $comm
diff --git a/tools/perf/scripts/python/check-perf-trace.py b/tools/perf/scripts/python/check-perf-trace.py
index d9f7893e315c..4647a7694cf6 100644
--- a/tools/perf/scripts/python/check-perf-trace.py
+++ b/tools/perf/scripts/python/check-perf-trace.py
@@ -1,4 +1,4 @@
1# perf trace event handlers, generated by perf trace -g python 1# perf script event handlers, generated by perf script -g python
2# (c) 2010, Tom Zanussi <tzanussi@gmail.com> 2# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
3# Licensed under the terms of the GNU GPL License version 2 3# Licensed under the terms of the GNU GPL License version 2
4# 4#
diff --git a/tools/perf/scripts/python/failed-syscalls-by-pid.py b/tools/perf/scripts/python/failed-syscalls-by-pid.py
index acd7848717b3..85805fac4116 100644
--- a/tools/perf/scripts/python/failed-syscalls-by-pid.py
+++ b/tools/perf/scripts/python/failed-syscalls-by-pid.py
@@ -15,7 +15,7 @@ from perf_trace_context import *
15from Core import * 15from Core import *
16from Util import * 16from Util import *
17 17
18usage = "perf trace -s syscall-counts-by-pid.py [comm|pid]\n"; 18usage = "perf script -s syscall-counts-by-pid.py [comm|pid]\n";
19 19
20for_comm = None 20for_comm = None
21for_pid = None 21for_pid = None
diff --git a/tools/perf/scripts/python/sched-migration.py b/tools/perf/scripts/python/sched-migration.py
index b934383c3364..74d55ec08aed 100644
--- a/tools/perf/scripts/python/sched-migration.py
+++ b/tools/perf/scripts/python/sched-migration.py
@@ -4,7 +4,7 @@
4# 4#
5# Copyright (C) 2010 Frederic Weisbecker <fweisbec@gmail.com> 5# Copyright (C) 2010 Frederic Weisbecker <fweisbec@gmail.com>
6# 6#
7# perf trace event handlers have been generated by perf trace -g python 7# perf script event handlers have been generated by perf script -g python
8# 8#
9# This software is distributed under the terms of the GNU General 9# This software is distributed under the terms of the GNU General
10# Public License ("GPL") version 2 as published by the Free Software 10# Public License ("GPL") version 2 as published by the Free Software
diff --git a/tools/perf/scripts/python/sctop.py b/tools/perf/scripts/python/sctop.py
index 7a6ec2c7d8ab..42c267e292fa 100644
--- a/tools/perf/scripts/python/sctop.py
+++ b/tools/perf/scripts/python/sctop.py
@@ -17,7 +17,7 @@ from perf_trace_context import *
17from Core import * 17from Core import *
18from Util import * 18from Util import *
19 19
20usage = "perf trace -s sctop.py [comm] [interval]\n"; 20usage = "perf script -s sctop.py [comm] [interval]\n";
21 21
22for_comm = None 22for_comm = None
23default_interval = 3 23default_interval = 3
diff --git a/tools/perf/scripts/python/syscall-counts-by-pid.py b/tools/perf/scripts/python/syscall-counts-by-pid.py
index d1ee3ec10cf2..c64d1c55d745 100644
--- a/tools/perf/scripts/python/syscall-counts-by-pid.py
+++ b/tools/perf/scripts/python/syscall-counts-by-pid.py
@@ -14,7 +14,7 @@ from perf_trace_context import *
14from Core import * 14from Core import *
15from Util import syscall_name 15from Util import syscall_name
16 16
17usage = "perf trace -s syscall-counts-by-pid.py [comm]\n"; 17usage = "perf script -s syscall-counts-by-pid.py [comm]\n";
18 18
19for_comm = None 19for_comm = None
20for_pid = None 20for_pid = None
diff --git a/tools/perf/scripts/python/syscall-counts.py b/tools/perf/scripts/python/syscall-counts.py
index ea183dc82d29..b435d3f188e8 100644
--- a/tools/perf/scripts/python/syscall-counts.py
+++ b/tools/perf/scripts/python/syscall-counts.py
@@ -15,7 +15,7 @@ from perf_trace_context import *
15from Core import * 15from Core import *
16from Util import syscall_name 16from Util import syscall_name
17 17
18usage = "perf trace -s syscall-counts.py [comm]\n"; 18usage = "perf script -s syscall-counts.py [comm]\n";
19 19
20for_comm = None 20for_comm = None
21 21
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index e437edb72417..deffb8c96071 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -14,7 +14,9 @@
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include "debug.h" 15#include "debug.h"
16 16
17static int build_id__mark_dso_hit(event_t *event, struct perf_session *session) 17static int build_id__mark_dso_hit(event_t *event,
18 struct sample_data *sample __used,
19 struct perf_session *session)
18{ 20{
19 struct addr_location al; 21 struct addr_location al;
20 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 22 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
@@ -35,7 +37,8 @@ static int build_id__mark_dso_hit(event_t *event, struct perf_session *session)
35 return 0; 37 return 0;
36} 38}
37 39
38static int event__exit_del_thread(event_t *self, struct perf_session *session) 40static int event__exit_del_thread(event_t *self, struct sample_data *sample __used,
41 struct perf_session *session)
39{ 42{
40 struct thread *thread = perf_session__findnew(session, self->fork.tid); 43 struct thread *thread = perf_session__findnew(session, self->fork.tid);
41 44
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 0f9b8d7a7d7e..3ccaa1043383 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -4,32 +4,53 @@
4#include <assert.h> 4#include <assert.h>
5#include <stdio.h> 5#include <stdio.h>
6 6
7int cpumap[MAX_NR_CPUS]; 7static struct cpu_map *cpu_map__default_new(void)
8
9static int default_cpu_map(void)
10{ 8{
11 int nr_cpus, i; 9 struct cpu_map *cpus;
10 int nr_cpus;
12 11
13 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); 12 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
14 assert(nr_cpus <= MAX_NR_CPUS); 13 if (nr_cpus < 0)
15 assert((int)nr_cpus >= 0); 14 return NULL;
15
16 cpus = malloc(sizeof(*cpus) + nr_cpus * sizeof(int));
17 if (cpus != NULL) {
18 int i;
19 for (i = 0; i < nr_cpus; ++i)
20 cpus->map[i] = i;
16 21
17 for (i = 0; i < nr_cpus; ++i) 22 cpus->nr = nr_cpus;
18 cpumap[i] = i; 23 }
19 24
20 return nr_cpus; 25 return cpus;
21} 26}
22 27
23static int read_all_cpu_map(void) 28static struct cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus)
24{ 29{
30 size_t payload_size = nr_cpus * sizeof(int);
31 struct cpu_map *cpus = malloc(sizeof(*cpus) + payload_size);
32
33 if (cpus != NULL) {
34 cpus->nr = nr_cpus;
35 memcpy(cpus->map, tmp_cpus, payload_size);
36 }
37
38 return cpus;
39}
40
41static struct cpu_map *cpu_map__read_all_cpu_map(void)
42{
43 struct cpu_map *cpus = NULL;
25 FILE *onlnf; 44 FILE *onlnf;
26 int nr_cpus = 0; 45 int nr_cpus = 0;
46 int *tmp_cpus = NULL, *tmp;
47 int max_entries = 0;
27 int n, cpu, prev; 48 int n, cpu, prev;
28 char sep; 49 char sep;
29 50
30 onlnf = fopen("/sys/devices/system/cpu/online", "r"); 51 onlnf = fopen("/sys/devices/system/cpu/online", "r");
31 if (!onlnf) 52 if (!onlnf)
32 return default_cpu_map(); 53 return cpu_map__default_new();
33 54
34 sep = 0; 55 sep = 0;
35 prev = -1; 56 prev = -1;
@@ -38,12 +59,28 @@ static int read_all_cpu_map(void)
38 if (n <= 0) 59 if (n <= 0)
39 break; 60 break;
40 if (prev >= 0) { 61 if (prev >= 0) {
41 assert(nr_cpus + cpu - prev - 1 < MAX_NR_CPUS); 62 int new_max = nr_cpus + cpu - prev - 1;
63
64 if (new_max >= max_entries) {
65 max_entries = new_max + MAX_NR_CPUS / 2;
66 tmp = realloc(tmp_cpus, max_entries * sizeof(int));
67 if (tmp == NULL)
68 goto out_free_tmp;
69 tmp_cpus = tmp;
70 }
71
42 while (++prev < cpu) 72 while (++prev < cpu)
43 cpumap[nr_cpus++] = prev; 73 tmp_cpus[nr_cpus++] = prev;
74 }
75 if (nr_cpus == max_entries) {
76 max_entries += MAX_NR_CPUS;
77 tmp = realloc(tmp_cpus, max_entries * sizeof(int));
78 if (tmp == NULL)
79 goto out_free_tmp;
80 tmp_cpus = tmp;
44 } 81 }
45 assert (nr_cpus < MAX_NR_CPUS); 82
46 cpumap[nr_cpus++] = cpu; 83 tmp_cpus[nr_cpus++] = cpu;
47 if (n == 2 && sep == '-') 84 if (n == 2 && sep == '-')
48 prev = cpu; 85 prev = cpu;
49 else 86 else
@@ -51,24 +88,31 @@ static int read_all_cpu_map(void)
51 if (n == 1 || sep == '\n') 88 if (n == 1 || sep == '\n')
52 break; 89 break;
53 } 90 }
54 fclose(onlnf);
55 if (nr_cpus > 0)
56 return nr_cpus;
57 91
58 return default_cpu_map(); 92 if (nr_cpus > 0)
93 cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
94 else
95 cpus = cpu_map__default_new();
96out_free_tmp:
97 free(tmp_cpus);
98 fclose(onlnf);
99 return cpus;
59} 100}
60 101
61int read_cpu_map(const char *cpu_list) 102struct cpu_map *cpu_map__new(const char *cpu_list)
62{ 103{
104 struct cpu_map *cpus = NULL;
63 unsigned long start_cpu, end_cpu = 0; 105 unsigned long start_cpu, end_cpu = 0;
64 char *p = NULL; 106 char *p = NULL;
65 int i, nr_cpus = 0; 107 int i, nr_cpus = 0;
108 int *tmp_cpus = NULL, *tmp;
109 int max_entries = 0;
66 110
67 if (!cpu_list) 111 if (!cpu_list)
68 return read_all_cpu_map(); 112 return cpu_map__read_all_cpu_map();
69 113
70 if (!isdigit(*cpu_list)) 114 if (!isdigit(*cpu_list))
71 goto invalid; 115 goto out;
72 116
73 while (isdigit(*cpu_list)) { 117 while (isdigit(*cpu_list)) {
74 p = NULL; 118 p = NULL;
@@ -94,21 +138,42 @@ int read_cpu_map(const char *cpu_list)
94 for (; start_cpu <= end_cpu; start_cpu++) { 138 for (; start_cpu <= end_cpu; start_cpu++) {
95 /* check for duplicates */ 139 /* check for duplicates */
96 for (i = 0; i < nr_cpus; i++) 140 for (i = 0; i < nr_cpus; i++)
97 if (cpumap[i] == (int)start_cpu) 141 if (tmp_cpus[i] == (int)start_cpu)
98 goto invalid; 142 goto invalid;
99 143
100 assert(nr_cpus < MAX_NR_CPUS); 144 if (nr_cpus == max_entries) {
101 cpumap[nr_cpus++] = (int)start_cpu; 145 max_entries += MAX_NR_CPUS;
146 tmp = realloc(tmp_cpus, max_entries * sizeof(int));
147 if (tmp == NULL)
148 goto invalid;
149 tmp_cpus = tmp;
150 }
151 tmp_cpus[nr_cpus++] = (int)start_cpu;
102 } 152 }
103 if (*p) 153 if (*p)
104 ++p; 154 ++p;
105 155
106 cpu_list = p; 156 cpu_list = p;
107 } 157 }
108 if (nr_cpus > 0)
109 return nr_cpus;
110 158
111 return default_cpu_map(); 159 if (nr_cpus > 0)
160 cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
161 else
162 cpus = cpu_map__default_new();
112invalid: 163invalid:
113 return -1; 164 free(tmp_cpus);
165out:
166 return cpus;
167}
168
169struct cpu_map *cpu_map__dummy_new(void)
170{
171 struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int));
172
173 if (cpus != NULL) {
174 cpus->nr = 1;
175 cpus->map[0] = -1;
176 }
177
178 return cpus;
114} 179}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 3e60f56e490e..f7a4f42f6307 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -1,7 +1,13 @@
1#ifndef __PERF_CPUMAP_H 1#ifndef __PERF_CPUMAP_H
2#define __PERF_CPUMAP_H 2#define __PERF_CPUMAP_H
3 3
4extern int read_cpu_map(const char *cpu_list); 4struct cpu_map {
5extern int cpumap[]; 5 int nr;
6 int map[];
7};
8
9struct cpu_map *cpu_map__new(const char *cpu_list);
10struct cpu_map *cpu_map__dummy_new(void);
11void *cpu_map__delete(struct cpu_map *map);
6 12
7#endif /* __PERF_CPUMAP_H */ 13#endif /* __PERF_CPUMAP_H */
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index c8d81b00089d..01bbe8ecec3f 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -46,20 +46,16 @@ int dump_printf(const char *fmt, ...)
46 return ret; 46 return ret;
47} 47}
48 48
49static int dump_printf_color(const char *fmt, const char *color, ...) 49#ifdef NO_NEWT_SUPPORT
50void ui__warning(const char *format, ...)
50{ 51{
51 va_list args; 52 va_list args;
52 int ret = 0;
53 53
54 if (dump_trace) { 54 va_start(args, format);
55 va_start(args, color); 55 vfprintf(stderr, format, args);
56 ret = color_vfprintf(stdout, color, fmt, args); 56 va_end(args);
57 va_end(args);
58 }
59
60 return ret;
61} 57}
62 58#endif
63 59
64void trace_event(event_t *event) 60void trace_event(event_t *event)
65{ 61{
@@ -70,29 +66,29 @@ void trace_event(event_t *event)
70 if (!dump_trace) 66 if (!dump_trace)
71 return; 67 return;
72 68
73 dump_printf("."); 69 printf(".");
74 dump_printf_color("\n. ... raw event: size %d bytes\n", color, 70 color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n",
75 event->header.size); 71 event->header.size);
76 72
77 for (i = 0; i < event->header.size; i++) { 73 for (i = 0; i < event->header.size; i++) {
78 if ((i & 15) == 0) { 74 if ((i & 15) == 0) {
79 dump_printf("."); 75 printf(".");
80 dump_printf_color(" %04x: ", color, i); 76 color_fprintf(stdout, color, " %04x: ", i);
81 } 77 }
82 78
83 dump_printf_color(" %02x", color, raw_event[i]); 79 color_fprintf(stdout, color, " %02x", raw_event[i]);
84 80
85 if (((i & 15) == 15) || i == event->header.size-1) { 81 if (((i & 15) == 15) || i == event->header.size-1) {
86 dump_printf_color(" ", color); 82 color_fprintf(stdout, color, " ");
87 for (j = 0; j < 15-(i & 15); j++) 83 for (j = 0; j < 15-(i & 15); j++)
88 dump_printf_color(" ", color); 84 color_fprintf(stdout, color, " ");
89 for (j = i & ~15; j <= i; j++) { 85 for (j = i & ~15; j <= i; j++) {
90 dump_printf_color("%c", color, 86 color_fprintf(stdout, color, "%c",
91 isprint(raw_event[j]) ? 87 isprint(raw_event[j]) ?
92 raw_event[j] : '.'); 88 raw_event[j] : '.');
93 } 89 }
94 dump_printf_color("\n", color); 90 color_fprintf(stdout, color, "\n");
95 } 91 }
96 } 92 }
97 dump_printf(".\n"); 93 printf(".\n");
98} 94}
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index 7b514082bbaf..ca35fd66b5df 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -35,4 +35,6 @@ int ui_helpline__show_help(const char *format, va_list ap);
35#include "ui/progress.h" 35#include "ui/progress.h"
36#endif 36#endif
37 37
38void ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2)));
39
38#endif /* __PERF_DEBUG_H */ 40#endif /* __PERF_DEBUG_H */
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index dab9e754a281..2302ec051bb4 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -7,7 +7,7 @@
7#include "strlist.h" 7#include "strlist.h"
8#include "thread.h" 8#include "thread.h"
9 9
10const char *event__name[] = { 10static const char *event__name[] = {
11 [0] = "TOTAL", 11 [0] = "TOTAL",
12 [PERF_RECORD_MMAP] = "MMAP", 12 [PERF_RECORD_MMAP] = "MMAP",
13 [PERF_RECORD_LOST] = "LOST", 13 [PERF_RECORD_LOST] = "LOST",
@@ -22,13 +22,31 @@ const char *event__name[] = {
22 [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", 22 [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
23 [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", 23 [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
24 [PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID", 24 [PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID",
25 [PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND",
25}; 26};
26 27
27static pid_t event__synthesize_comm(pid_t pid, int full, 28const char *event__get_event_name(unsigned int id)
29{
30 if (id >= ARRAY_SIZE(event__name))
31 return "INVALID";
32 if (!event__name[id])
33 return "UNKNOWN";
34 return event__name[id];
35}
36
37static struct sample_data synth_sample = {
38 .pid = -1,
39 .tid = -1,
40 .time = -1,
41 .stream_id = -1,
42 .cpu = -1,
43 .period = 1,
44};
45
46static pid_t event__synthesize_comm(event_t *event, pid_t pid, int full,
28 event__handler_t process, 47 event__handler_t process,
29 struct perf_session *session) 48 struct perf_session *session)
30{ 49{
31 event_t ev;
32 char filename[PATH_MAX]; 50 char filename[PATH_MAX];
33 char bf[BUFSIZ]; 51 char bf[BUFSIZ];
34 FILE *fp; 52 FILE *fp;
@@ -49,34 +67,39 @@ out_race:
49 return 0; 67 return 0;
50 } 68 }
51 69
52 memset(&ev.comm, 0, sizeof(ev.comm)); 70 memset(&event->comm, 0, sizeof(event->comm));
53 while (!ev.comm.comm[0] || !ev.comm.pid) { 71
54 if (fgets(bf, sizeof(bf), fp) == NULL) 72 while (!event->comm.comm[0] || !event->comm.pid) {
55 goto out_failure; 73 if (fgets(bf, sizeof(bf), fp) == NULL) {
74 pr_warning("couldn't get COMM and pgid, malformed %s\n", filename);
75 goto out;
76 }
56 77
57 if (memcmp(bf, "Name:", 5) == 0) { 78 if (memcmp(bf, "Name:", 5) == 0) {
58 char *name = bf + 5; 79 char *name = bf + 5;
59 while (*name && isspace(*name)) 80 while (*name && isspace(*name))
60 ++name; 81 ++name;
61 size = strlen(name) - 1; 82 size = strlen(name) - 1;
62 memcpy(ev.comm.comm, name, size++); 83 memcpy(event->comm.comm, name, size++);
63 } else if (memcmp(bf, "Tgid:", 5) == 0) { 84 } else if (memcmp(bf, "Tgid:", 5) == 0) {
64 char *tgids = bf + 5; 85 char *tgids = bf + 5;
65 while (*tgids && isspace(*tgids)) 86 while (*tgids && isspace(*tgids))
66 ++tgids; 87 ++tgids;
67 tgid = ev.comm.pid = atoi(tgids); 88 tgid = event->comm.pid = atoi(tgids);
68 } 89 }
69 } 90 }
70 91
71 ev.comm.header.type = PERF_RECORD_COMM; 92 event->comm.header.type = PERF_RECORD_COMM;
72 size = ALIGN(size, sizeof(u64)); 93 size = ALIGN(size, sizeof(u64));
73 ev.comm.header.size = sizeof(ev.comm) - (sizeof(ev.comm.comm) - size); 94 memset(event->comm.comm + size, 0, session->id_hdr_size);
74 95 event->comm.header.size = (sizeof(event->comm) -
96 (sizeof(event->comm.comm) - size) +
97 session->id_hdr_size);
75 if (!full) { 98 if (!full) {
76 ev.comm.tid = pid; 99 event->comm.tid = pid;
77 100
78 process(&ev, session); 101 process(event, &synth_sample, session);
79 goto out_fclose; 102 goto out;
80 } 103 }
81 104
82 snprintf(filename, sizeof(filename), "/proc/%d/task", pid); 105 snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
@@ -91,22 +114,19 @@ out_race:
91 if (*end) 114 if (*end)
92 continue; 115 continue;
93 116
94 ev.comm.tid = pid; 117 event->comm.tid = pid;
95 118
96 process(&ev, session); 119 process(event, &synth_sample, session);
97 } 120 }
98 closedir(tasks);
99 121
100out_fclose: 122 closedir(tasks);
123out:
101 fclose(fp); 124 fclose(fp);
102 return tgid;
103 125
104out_failure: 126 return tgid;
105 pr_warning("couldn't get COMM and pgid, malformed %s\n", filename);
106 return -1;
107} 127}
108 128
109static int event__synthesize_mmap_events(pid_t pid, pid_t tgid, 129static int event__synthesize_mmap_events(event_t *event, pid_t pid, pid_t tgid,
110 event__handler_t process, 130 event__handler_t process,
111 struct perf_session *session) 131 struct perf_session *session)
112{ 132{
@@ -124,29 +144,25 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
124 return -1; 144 return -1;
125 } 145 }
126 146
147 event->header.type = PERF_RECORD_MMAP;
148 /*
149 * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c
150 */
151 event->header.misc = PERF_RECORD_MISC_USER;
152
127 while (1) { 153 while (1) {
128 char bf[BUFSIZ], *pbf = bf; 154 char bf[BUFSIZ], *pbf = bf;
129 event_t ev = {
130 .header = {
131 .type = PERF_RECORD_MMAP,
132 /*
133 * Just like the kernel, see __perf_event_mmap
134 * in kernel/perf_event.c
135 */
136 .misc = PERF_RECORD_MISC_USER,
137 },
138 };
139 int n; 155 int n;
140 size_t size; 156 size_t size;
141 if (fgets(bf, sizeof(bf), fp) == NULL) 157 if (fgets(bf, sizeof(bf), fp) == NULL)
142 break; 158 break;
143 159
144 /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ 160 /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
145 n = hex2u64(pbf, &ev.mmap.start); 161 n = hex2u64(pbf, &event->mmap.start);
146 if (n < 0) 162 if (n < 0)
147 continue; 163 continue;
148 pbf += n + 1; 164 pbf += n + 1;
149 n = hex2u64(pbf, &ev.mmap.len); 165 n = hex2u64(pbf, &event->mmap.len);
150 if (n < 0) 166 if (n < 0)
151 continue; 167 continue;
152 pbf += n + 3; 168 pbf += n + 3;
@@ -161,19 +177,21 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
161 continue; 177 continue;
162 178
163 pbf += 3; 179 pbf += 3;
164 n = hex2u64(pbf, &ev.mmap.pgoff); 180 n = hex2u64(pbf, &event->mmap.pgoff);
165 181
166 size = strlen(execname); 182 size = strlen(execname);
167 execname[size - 1] = '\0'; /* Remove \n */ 183 execname[size - 1] = '\0'; /* Remove \n */
168 memcpy(ev.mmap.filename, execname, size); 184 memcpy(event->mmap.filename, execname, size);
169 size = ALIGN(size, sizeof(u64)); 185 size = ALIGN(size, sizeof(u64));
170 ev.mmap.len -= ev.mmap.start; 186 event->mmap.len -= event->mmap.start;
171 ev.mmap.header.size = (sizeof(ev.mmap) - 187 event->mmap.header.size = (sizeof(event->mmap) -
172 (sizeof(ev.mmap.filename) - size)); 188 (sizeof(event->mmap.filename) - size));
173 ev.mmap.pid = tgid; 189 memset(event->mmap.filename + size, 0, session->id_hdr_size);
174 ev.mmap.tid = pid; 190 event->mmap.header.size += session->id_hdr_size;
175 191 event->mmap.pid = tgid;
176 process(&ev, session); 192 event->mmap.tid = pid;
193
194 process(event, &synth_sample, session);
177 } 195 }
178 } 196 }
179 197
@@ -187,20 +205,27 @@ int event__synthesize_modules(event__handler_t process,
187{ 205{
188 struct rb_node *nd; 206 struct rb_node *nd;
189 struct map_groups *kmaps = &machine->kmaps; 207 struct map_groups *kmaps = &machine->kmaps;
190 u16 misc; 208 event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size);
209
210 if (event == NULL) {
211 pr_debug("Not enough memory synthesizing mmap event "
212 "for kernel modules\n");
213 return -1;
214 }
215
216 event->header.type = PERF_RECORD_MMAP;
191 217
192 /* 218 /*
193 * kernel uses 0 for user space maps, see kernel/perf_event.c 219 * kernel uses 0 for user space maps, see kernel/perf_event.c
194 * __perf_event_mmap 220 * __perf_event_mmap
195 */ 221 */
196 if (machine__is_host(machine)) 222 if (machine__is_host(machine))
197 misc = PERF_RECORD_MISC_KERNEL; 223 event->header.misc = PERF_RECORD_MISC_KERNEL;
198 else 224 else
199 misc = PERF_RECORD_MISC_GUEST_KERNEL; 225 event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
200 226
201 for (nd = rb_first(&kmaps->maps[MAP__FUNCTION]); 227 for (nd = rb_first(&kmaps->maps[MAP__FUNCTION]);
202 nd; nd = rb_next(nd)) { 228 nd; nd = rb_next(nd)) {
203 event_t ev;
204 size_t size; 229 size_t size;
205 struct map *pos = rb_entry(nd, struct map, rb_node); 230 struct map *pos = rb_entry(nd, struct map, rb_node);
206 231
@@ -208,39 +233,78 @@ int event__synthesize_modules(event__handler_t process,
208 continue; 233 continue;
209 234
210 size = ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); 235 size = ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
211 memset(&ev, 0, sizeof(ev)); 236 event->mmap.header.type = PERF_RECORD_MMAP;
212 ev.mmap.header.misc = misc; 237 event->mmap.header.size = (sizeof(event->mmap) -
213 ev.mmap.header.type = PERF_RECORD_MMAP; 238 (sizeof(event->mmap.filename) - size));
214 ev.mmap.header.size = (sizeof(ev.mmap) - 239 memset(event->mmap.filename + size, 0, session->id_hdr_size);
215 (sizeof(ev.mmap.filename) - size)); 240 event->mmap.header.size += session->id_hdr_size;
216 ev.mmap.start = pos->start; 241 event->mmap.start = pos->start;
217 ev.mmap.len = pos->end - pos->start; 242 event->mmap.len = pos->end - pos->start;
218 ev.mmap.pid = machine->pid; 243 event->mmap.pid = machine->pid;
219 244
220 memcpy(ev.mmap.filename, pos->dso->long_name, 245 memcpy(event->mmap.filename, pos->dso->long_name,
221 pos->dso->long_name_len + 1); 246 pos->dso->long_name_len + 1);
222 process(&ev, session); 247 process(event, &synth_sample, session);
223 } 248 }
224 249
250 free(event);
225 return 0; 251 return 0;
226} 252}
227 253
228int event__synthesize_thread(pid_t pid, event__handler_t process, 254static int __event__synthesize_thread(event_t *comm_event, event_t *mmap_event,
229 struct perf_session *session) 255 pid_t pid, event__handler_t process,
256 struct perf_session *session)
230{ 257{
231 pid_t tgid = event__synthesize_comm(pid, 1, process, session); 258 pid_t tgid = event__synthesize_comm(comm_event, pid, 1, process,
259 session);
232 if (tgid == -1) 260 if (tgid == -1)
233 return -1; 261 return -1;
234 return event__synthesize_mmap_events(pid, tgid, process, session); 262 return event__synthesize_mmap_events(mmap_event, pid, tgid,
263 process, session);
264}
265
266int event__synthesize_thread(pid_t pid, event__handler_t process,
267 struct perf_session *session)
268{
269 event_t *comm_event, *mmap_event;
270 int err = -1;
271
272 comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size);
273 if (comm_event == NULL)
274 goto out;
275
276 mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size);
277 if (mmap_event == NULL)
278 goto out_free_comm;
279
280 err = __event__synthesize_thread(comm_event, mmap_event, pid,
281 process, session);
282 free(mmap_event);
283out_free_comm:
284 free(comm_event);
285out:
286 return err;
235} 287}
236 288
237void event__synthesize_threads(event__handler_t process, 289int event__synthesize_threads(event__handler_t process,
238 struct perf_session *session) 290 struct perf_session *session)
239{ 291{
240 DIR *proc; 292 DIR *proc;
241 struct dirent dirent, *next; 293 struct dirent dirent, *next;
294 event_t *comm_event, *mmap_event;
295 int err = -1;
296
297 comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size);
298 if (comm_event == NULL)
299 goto out;
300
301 mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size);
302 if (mmap_event == NULL)
303 goto out_free_comm;
242 304
243 proc = opendir("/proc"); 305 proc = opendir("/proc");
306 if (proc == NULL)
307 goto out_free_mmap;
244 308
245 while (!readdir_r(proc, &dirent, &next) && next) { 309 while (!readdir_r(proc, &dirent, &next) && next) {
246 char *end; 310 char *end;
@@ -249,10 +313,18 @@ void event__synthesize_threads(event__handler_t process,
249 if (*end) /* only interested in proper numerical dirents */ 313 if (*end) /* only interested in proper numerical dirents */
250 continue; 314 continue;
251 315
252 event__synthesize_thread(pid, process, session); 316 __event__synthesize_thread(comm_event, mmap_event, pid,
317 process, session);
253 } 318 }
254 319
255 closedir(proc); 320 closedir(proc);
321 err = 0;
322out_free_mmap:
323 free(mmap_event);
324out_free_comm:
325 free(comm_event);
326out:
327 return err;
256} 328}
257 329
258struct process_symbol_args { 330struct process_symbol_args {
@@ -260,7 +332,8 @@ struct process_symbol_args {
260 u64 start; 332 u64 start;
261}; 333};
262 334
263static int find_symbol_cb(void *arg, const char *name, char type, u64 start) 335static int find_symbol_cb(void *arg, const char *name, char type,
336 u64 start, u64 end __used)
264{ 337{
265 struct process_symbol_args *args = arg; 338 struct process_symbol_args *args = arg;
266 339
@@ -286,18 +359,20 @@ int event__synthesize_kernel_mmap(event__handler_t process,
286 char path[PATH_MAX]; 359 char path[PATH_MAX];
287 char name_buff[PATH_MAX]; 360 char name_buff[PATH_MAX];
288 struct map *map; 361 struct map *map;
289 362 int err;
290 event_t ev = {
291 .header = {
292 .type = PERF_RECORD_MMAP,
293 },
294 };
295 /* 363 /*
296 * We should get this from /sys/kernel/sections/.text, but till that is 364 * We should get this from /sys/kernel/sections/.text, but till that is
297 * available use this, and after it is use this as a fallback for older 365 * available use this, and after it is use this as a fallback for older
298 * kernels. 366 * kernels.
299 */ 367 */
300 struct process_symbol_args args = { .name = symbol_name, }; 368 struct process_symbol_args args = { .name = symbol_name, };
369 event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size);
370
371 if (event == NULL) {
372 pr_debug("Not enough memory synthesizing mmap event "
373 "for kernel modules\n");
374 return -1;
375 }
301 376
302 mmap_name = machine__mmap_name(machine, name_buff, sizeof(name_buff)); 377 mmap_name = machine__mmap_name(machine, name_buff, sizeof(name_buff));
303 if (machine__is_host(machine)) { 378 if (machine__is_host(machine)) {
@@ -305,10 +380,10 @@ int event__synthesize_kernel_mmap(event__handler_t process,
305 * kernel uses PERF_RECORD_MISC_USER for user space maps, 380 * kernel uses PERF_RECORD_MISC_USER for user space maps,
306 * see kernel/perf_event.c __perf_event_mmap 381 * see kernel/perf_event.c __perf_event_mmap
307 */ 382 */
308 ev.header.misc = PERF_RECORD_MISC_KERNEL; 383 event->header.misc = PERF_RECORD_MISC_KERNEL;
309 filename = "/proc/kallsyms"; 384 filename = "/proc/kallsyms";
310 } else { 385 } else {
311 ev.header.misc = PERF_RECORD_MISC_GUEST_KERNEL; 386 event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
312 if (machine__is_default_guest(machine)) 387 if (machine__is_default_guest(machine))
313 filename = (char *) symbol_conf.default_guest_kallsyms; 388 filename = (char *) symbol_conf.default_guest_kallsyms;
314 else { 389 else {
@@ -321,17 +396,21 @@ int event__synthesize_kernel_mmap(event__handler_t process,
321 return -ENOENT; 396 return -ENOENT;
322 397
323 map = machine->vmlinux_maps[MAP__FUNCTION]; 398 map = machine->vmlinux_maps[MAP__FUNCTION];
324 size = snprintf(ev.mmap.filename, sizeof(ev.mmap.filename), 399 size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
325 "%s%s", mmap_name, symbol_name) + 1; 400 "%s%s", mmap_name, symbol_name) + 1;
326 size = ALIGN(size, sizeof(u64)); 401 size = ALIGN(size, sizeof(u64));
327 ev.mmap.header.size = (sizeof(ev.mmap) - 402 event->mmap.header.type = PERF_RECORD_MMAP;
328 (sizeof(ev.mmap.filename) - size)); 403 event->mmap.header.size = (sizeof(event->mmap) -
329 ev.mmap.pgoff = args.start; 404 (sizeof(event->mmap.filename) - size) + session->id_hdr_size);
330 ev.mmap.start = map->start; 405 event->mmap.pgoff = args.start;
331 ev.mmap.len = map->end - ev.mmap.start; 406 event->mmap.start = map->start;
332 ev.mmap.pid = machine->pid; 407 event->mmap.len = map->end - event->mmap.start;
333 408 event->mmap.pid = machine->pid;
334 return process(&ev, session); 409
410 err = process(event, &synth_sample, session);
411 free(event);
412
413 return err;
335} 414}
336 415
337static void thread__comm_adjust(struct thread *self, struct hists *hists) 416static void thread__comm_adjust(struct thread *self, struct hists *hists)
@@ -361,7 +440,8 @@ static int thread__set_comm_adjust(struct thread *self, const char *comm,
361 return 0; 440 return 0;
362} 441}
363 442
364int event__process_comm(event_t *self, struct perf_session *session) 443int event__process_comm(event_t *self, struct sample_data *sample __used,
444 struct perf_session *session)
365{ 445{
366 struct thread *thread = perf_session__findnew(session, self->comm.tid); 446 struct thread *thread = perf_session__findnew(session, self->comm.tid);
367 447
@@ -376,7 +456,8 @@ int event__process_comm(event_t *self, struct perf_session *session)
376 return 0; 456 return 0;
377} 457}
378 458
379int event__process_lost(event_t *self, struct perf_session *session) 459int event__process_lost(event_t *self, struct sample_data *sample __used,
460 struct perf_session *session)
380{ 461{
381 dump_printf(": id:%Ld: lost:%Ld\n", self->lost.id, self->lost.lost); 462 dump_printf(": id:%Ld: lost:%Ld\n", self->lost.id, self->lost.lost);
382 session->hists.stats.total_lost += self->lost.lost; 463 session->hists.stats.total_lost += self->lost.lost;
@@ -392,7 +473,7 @@ static void event_set_kernel_mmap_len(struct map **maps, event_t *self)
392 * a zero sized synthesized MMAP event for the kernel. 473 * a zero sized synthesized MMAP event for the kernel.
393 */ 474 */
394 if (maps[MAP__FUNCTION]->end == 0) 475 if (maps[MAP__FUNCTION]->end == 0)
395 maps[MAP__FUNCTION]->end = ~0UL; 476 maps[MAP__FUNCTION]->end = ~0ULL;
396} 477}
397 478
398static int event__process_kernel_mmap(event_t *self, 479static int event__process_kernel_mmap(event_t *self,
@@ -485,7 +566,8 @@ out_problem:
485 return -1; 566 return -1;
486} 567}
487 568
488int event__process_mmap(event_t *self, struct perf_session *session) 569int event__process_mmap(event_t *self, struct sample_data *sample __used,
570 struct perf_session *session)
489{ 571{
490 struct machine *machine; 572 struct machine *machine;
491 struct thread *thread; 573 struct thread *thread;
@@ -526,7 +608,8 @@ out_problem:
526 return 0; 608 return 0;
527} 609}
528 610
529int event__process_task(event_t *self, struct perf_session *session) 611int event__process_task(event_t *self, struct sample_data *sample __used,
612 struct perf_session *session)
530{ 613{
531 struct thread *thread = perf_session__findnew(session, self->fork.tid); 614 struct thread *thread = perf_session__findnew(session, self->fork.tid);
532 struct thread *parent = perf_session__findnew(session, self->fork.ptid); 615 struct thread *parent = perf_session__findnew(session, self->fork.ptid);
@@ -548,18 +631,19 @@ int event__process_task(event_t *self, struct perf_session *session)
548 return 0; 631 return 0;
549} 632}
550 633
551int event__process(event_t *event, struct perf_session *session) 634int event__process(event_t *event, struct sample_data *sample,
635 struct perf_session *session)
552{ 636{
553 switch (event->header.type) { 637 switch (event->header.type) {
554 case PERF_RECORD_COMM: 638 case PERF_RECORD_COMM:
555 event__process_comm(event, session); 639 event__process_comm(event, sample, session);
556 break; 640 break;
557 case PERF_RECORD_MMAP: 641 case PERF_RECORD_MMAP:
558 event__process_mmap(event, session); 642 event__process_mmap(event, sample, session);
559 break; 643 break;
560 case PERF_RECORD_FORK: 644 case PERF_RECORD_FORK:
561 case PERF_RECORD_EXIT: 645 case PERF_RECORD_EXIT:
562 event__process_task(event, session); 646 event__process_task(event, sample, session);
563 break; 647 break;
564 default: 648 default:
565 break; 649 break;
@@ -674,32 +758,8 @@ int event__preprocess_sample(const event_t *self, struct perf_session *session,
674 symbol_filter_t filter) 758 symbol_filter_t filter)
675{ 759{
676 u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 760 u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
677 struct thread *thread; 761 struct thread *thread = perf_session__findnew(session, self->ip.pid);
678
679 event__parse_sample(self, session->sample_type, data);
680
681 dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld cpu:%d\n",
682 self->header.misc, data->pid, data->tid, data->ip,
683 data->period, data->cpu);
684
685 if (session->sample_type & PERF_SAMPLE_CALLCHAIN) {
686 unsigned int i;
687
688 dump_printf("... chain: nr:%Lu\n", data->callchain->nr);
689 762
690 if (!ip_callchain__valid(data->callchain, self)) {
691 pr_debug("call-chain problem with event, "
692 "skipping it.\n");
693 goto out_filtered;
694 }
695
696 if (dump_trace) {
697 for (i = 0; i < data->callchain->nr; i++)
698 dump_printf("..... %2d: %016Lx\n",
699 i, data->callchain->ips[i]);
700 }
701 }
702 thread = perf_session__findnew(session, self->ip.pid);
703 if (thread == NULL) 763 if (thread == NULL)
704 return -1; 764 return -1;
705 765
@@ -766,9 +826,65 @@ out_filtered:
766 return 0; 826 return 0;
767} 827}
768 828
769int event__parse_sample(const event_t *event, u64 type, struct sample_data *data) 829static int event__parse_id_sample(const event_t *event,
830 struct perf_session *session,
831 struct sample_data *sample)
770{ 832{
771 const u64 *array = event->sample.array; 833 const u64 *array;
834 u64 type;
835
836 sample->cpu = sample->pid = sample->tid = -1;
837 sample->stream_id = sample->id = sample->time = -1ULL;
838
839 if (!session->sample_id_all)
840 return 0;
841
842 array = event->sample.array;
843 array += ((event->header.size -
844 sizeof(event->header)) / sizeof(u64)) - 1;
845 type = session->sample_type;
846
847 if (type & PERF_SAMPLE_CPU) {
848 u32 *p = (u32 *)array;
849 sample->cpu = *p;
850 array--;
851 }
852
853 if (type & PERF_SAMPLE_STREAM_ID) {
854 sample->stream_id = *array;
855 array--;
856 }
857
858 if (type & PERF_SAMPLE_ID) {
859 sample->id = *array;
860 array--;
861 }
862
863 if (type & PERF_SAMPLE_TIME) {
864 sample->time = *array;
865 array--;
866 }
867
868 if (type & PERF_SAMPLE_TID) {
869 u32 *p = (u32 *)array;
870 sample->pid = p[0];
871 sample->tid = p[1];
872 }
873
874 return 0;
875}
876
877int event__parse_sample(const event_t *event, struct perf_session *session,
878 struct sample_data *data)
879{
880 const u64 *array;
881 u64 type;
882
883 if (event->header.type != PERF_RECORD_SAMPLE)
884 return event__parse_id_sample(event, session, data);
885
886 array = event->sample.array;
887 type = session->sample_type;
772 888
773 if (type & PERF_SAMPLE_IP) { 889 if (type & PERF_SAMPLE_IP) {
774 data->ip = event->ip.ip; 890 data->ip = event->ip.ip;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 8e790dae7026..2b7e91902f10 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -85,6 +85,7 @@ struct build_id_event {
85}; 85};
86 86
87enum perf_user_event_type { /* above any possible kernel type */ 87enum perf_user_event_type { /* above any possible kernel type */
88 PERF_RECORD_USER_TYPE_START = 64,
88 PERF_RECORD_HEADER_ATTR = 64, 89 PERF_RECORD_HEADER_ATTR = 64,
89 PERF_RECORD_HEADER_EVENT_TYPE = 65, 90 PERF_RECORD_HEADER_EVENT_TYPE = 65,
90 PERF_RECORD_HEADER_TRACING_DATA = 66, 91 PERF_RECORD_HEADER_TRACING_DATA = 66,
@@ -135,12 +136,15 @@ void event__print_totals(void);
135 136
136struct perf_session; 137struct perf_session;
137 138
138typedef int (*event__handler_t)(event_t *event, struct perf_session *session); 139typedef int (*event__handler_synth_t)(event_t *event,
140 struct perf_session *session);
141typedef int (*event__handler_t)(event_t *event, struct sample_data *sample,
142 struct perf_session *session);
139 143
140int event__synthesize_thread(pid_t pid, event__handler_t process, 144int event__synthesize_thread(pid_t pid, event__handler_t process,
141 struct perf_session *session); 145 struct perf_session *session);
142void event__synthesize_threads(event__handler_t process, 146int event__synthesize_threads(event__handler_t process,
143 struct perf_session *session); 147 struct perf_session *session);
144int event__synthesize_kernel_mmap(event__handler_t process, 148int event__synthesize_kernel_mmap(event__handler_t process,
145 struct perf_session *session, 149 struct perf_session *session,
146 struct machine *machine, 150 struct machine *machine,
@@ -150,18 +154,24 @@ int event__synthesize_modules(event__handler_t process,
150 struct perf_session *session, 154 struct perf_session *session,
151 struct machine *machine); 155 struct machine *machine);
152 156
153int event__process_comm(event_t *self, struct perf_session *session); 157int event__process_comm(event_t *self, struct sample_data *sample,
154int event__process_lost(event_t *self, struct perf_session *session); 158 struct perf_session *session);
155int event__process_mmap(event_t *self, struct perf_session *session); 159int event__process_lost(event_t *self, struct sample_data *sample,
156int event__process_task(event_t *self, struct perf_session *session); 160 struct perf_session *session);
157int event__process(event_t *event, struct perf_session *session); 161int event__process_mmap(event_t *self, struct sample_data *sample,
162 struct perf_session *session);
163int event__process_task(event_t *self, struct sample_data *sample,
164 struct perf_session *session);
165int event__process(event_t *event, struct sample_data *sample,
166 struct perf_session *session);
158 167
159struct addr_location; 168struct addr_location;
160int event__preprocess_sample(const event_t *self, struct perf_session *session, 169int event__preprocess_sample(const event_t *self, struct perf_session *session,
161 struct addr_location *al, struct sample_data *data, 170 struct addr_location *al, struct sample_data *data,
162 symbol_filter_t filter); 171 symbol_filter_t filter);
163int event__parse_sample(const event_t *event, u64 type, struct sample_data *data); 172int event__parse_sample(const event_t *event, struct perf_session *session,
173 struct sample_data *sample);
164 174
165extern const char *event__name[]; 175const char *event__get_event_name(unsigned int id);
166 176
167#endif /* __PERF_RECORD_H */ 177#endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
new file mode 100644
index 000000000000..c95267e63c5b
--- /dev/null
+++ b/tools/perf/util/evsel.c
@@ -0,0 +1,186 @@
1#include "evsel.h"
2#include "../perf.h"
3#include "util.h"
4#include "cpumap.h"
5#include "thread.h"
6
7#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
8
9struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx)
10{
11 struct perf_evsel *evsel = zalloc(sizeof(*evsel));
12
13 if (evsel != NULL) {
14 evsel->idx = idx;
15 evsel->attr.type = type;
16 evsel->attr.config = config;
17 INIT_LIST_HEAD(&evsel->node);
18 }
19
20 return evsel;
21}
22
23int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
24{
25 evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
26 return evsel->fd != NULL ? 0 : -ENOMEM;
27}
28
29int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
30{
31 evsel->counts = zalloc((sizeof(*evsel->counts) +
32 (ncpus * sizeof(struct perf_counts_values))));
33 return evsel->counts != NULL ? 0 : -ENOMEM;
34}
35
36void perf_evsel__free_fd(struct perf_evsel *evsel)
37{
38 xyarray__delete(evsel->fd);
39 evsel->fd = NULL;
40}
41
42void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
43{
44 int cpu, thread;
45
46 for (cpu = 0; cpu < ncpus; cpu++)
47 for (thread = 0; thread < nthreads; ++thread) {
48 close(FD(evsel, cpu, thread));
49 FD(evsel, cpu, thread) = -1;
50 }
51}
52
53void perf_evsel__delete(struct perf_evsel *evsel)
54{
55 assert(list_empty(&evsel->node));
56 xyarray__delete(evsel->fd);
57 free(evsel);
58}
59
60int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
61 int cpu, int thread, bool scale)
62{
63 struct perf_counts_values count;
64 size_t nv = scale ? 3 : 1;
65
66 if (FD(evsel, cpu, thread) < 0)
67 return -EINVAL;
68
69 if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0)
70 return -ENOMEM;
71
72 if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
73 return -errno;
74
75 if (scale) {
76 if (count.run == 0)
77 count.val = 0;
78 else if (count.run < count.ena)
79 count.val = (u64)((double)count.val * count.ena / count.run + 0.5);
80 } else
81 count.ena = count.run = 0;
82
83 evsel->counts->cpu[cpu] = count;
84 return 0;
85}
86
87int __perf_evsel__read(struct perf_evsel *evsel,
88 int ncpus, int nthreads, bool scale)
89{
90 size_t nv = scale ? 3 : 1;
91 int cpu, thread;
92 struct perf_counts_values *aggr = &evsel->counts->aggr, count;
93
94 aggr->val = 0;
95
96 for (cpu = 0; cpu < ncpus; cpu++) {
97 for (thread = 0; thread < nthreads; thread++) {
98 if (FD(evsel, cpu, thread) < 0)
99 continue;
100
101 if (readn(FD(evsel, cpu, thread),
102 &count, nv * sizeof(u64)) < 0)
103 return -errno;
104
105 aggr->val += count.val;
106 if (scale) {
107 aggr->ena += count.ena;
108 aggr->run += count.run;
109 }
110 }
111 }
112
113 evsel->counts->scaled = 0;
114 if (scale) {
115 if (aggr->run == 0) {
116 evsel->counts->scaled = -1;
117 aggr->val = 0;
118 return 0;
119 }
120
121 if (aggr->run < aggr->ena) {
122 evsel->counts->scaled = 1;
123 aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5);
124 }
125 } else
126 aggr->ena = aggr->run = 0;
127
128 return 0;
129}
130
131int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus)
132{
133 int cpu;
134
135 if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, cpus->nr, 1) < 0)
136 return -1;
137
138 for (cpu = 0; cpu < cpus->nr; cpu++) {
139 FD(evsel, cpu, 0) = sys_perf_event_open(&evsel->attr, -1,
140 cpus->map[cpu], -1, 0);
141 if (FD(evsel, cpu, 0) < 0)
142 goto out_close;
143 }
144
145 return 0;
146
147out_close:
148 while (--cpu >= 0) {
149 close(FD(evsel, cpu, 0));
150 FD(evsel, cpu, 0) = -1;
151 }
152 return -1;
153}
154
155int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads)
156{
157 int thread;
158
159 if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, 1, threads->nr))
160 return -1;
161
162 for (thread = 0; thread < threads->nr; thread++) {
163 FD(evsel, 0, thread) = sys_perf_event_open(&evsel->attr,
164 threads->map[thread], -1, -1, 0);
165 if (FD(evsel, 0, thread) < 0)
166 goto out_close;
167 }
168
169 return 0;
170
171out_close:
172 while (--thread >= 0) {
173 close(FD(evsel, 0, thread));
174 FD(evsel, 0, thread) = -1;
175 }
176 return -1;
177}
178
179int perf_evsel__open(struct perf_evsel *evsel,
180 struct cpu_map *cpus, struct thread_map *threads)
181{
182 if (threads == NULL)
183 return perf_evsel__open_per_cpu(evsel, cpus);
184
185 return perf_evsel__open_per_thread(evsel, threads);
186}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
new file mode 100644
index 000000000000..a0ccd69c3fc2
--- /dev/null
+++ b/tools/perf/util/evsel.h
@@ -0,0 +1,115 @@
1#ifndef __PERF_EVSEL_H
2#define __PERF_EVSEL_H 1
3
4#include <linux/list.h>
5#include <stdbool.h>
6#include "../../../include/linux/perf_event.h"
7#include "types.h"
8#include "xyarray.h"
9
10struct perf_counts_values {
11 union {
12 struct {
13 u64 val;
14 u64 ena;
15 u64 run;
16 };
17 u64 values[3];
18 };
19};
20
21struct perf_counts {
22 s8 scaled;
23 struct perf_counts_values aggr;
24 struct perf_counts_values cpu[];
25};
26
27struct perf_evsel {
28 struct list_head node;
29 struct perf_event_attr attr;
30 char *filter;
31 struct xyarray *fd;
32 struct perf_counts *counts;
33 int idx;
34 void *priv;
35};
36
37struct cpu_map;
38struct thread_map;
39
40struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx);
41void perf_evsel__delete(struct perf_evsel *evsel);
42
43int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
44int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
45void perf_evsel__free_fd(struct perf_evsel *evsel);
46void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
47
48int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus);
49int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads);
50int perf_evsel__open(struct perf_evsel *evsel,
51 struct cpu_map *cpus, struct thread_map *threads);
52
53#define perf_evsel__match(evsel, t, c) \
54 (evsel->attr.type == PERF_TYPE_##t && \
55 evsel->attr.config == PERF_COUNT_##c)
56
57int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
58 int cpu, int thread, bool scale);
59
60/**
61 * perf_evsel__read_on_cpu - Read out the results on a CPU and thread
62 *
63 * @evsel - event selector to read value
64 * @cpu - CPU of interest
65 * @thread - thread of interest
66 */
67static inline int perf_evsel__read_on_cpu(struct perf_evsel *evsel,
68 int cpu, int thread)
69{
70 return __perf_evsel__read_on_cpu(evsel, cpu, thread, false);
71}
72
73/**
74 * perf_evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled
75 *
76 * @evsel - event selector to read value
77 * @cpu - CPU of interest
78 * @thread - thread of interest
79 */
80static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel,
81 int cpu, int thread)
82{
83 return __perf_evsel__read_on_cpu(evsel, cpu, thread, true);
84}
85
86int __perf_evsel__read(struct perf_evsel *evsel, int ncpus, int nthreads,
87 bool scale);
88
89/**
90 * perf_evsel__read - Read the aggregate results on all CPUs
91 *
92 * @evsel - event selector to read value
93 * @ncpus - Number of cpus affected, from zero
94 * @nthreads - Number of threads affected, from zero
95 */
96static inline int perf_evsel__read(struct perf_evsel *evsel,
97 int ncpus, int nthreads)
98{
99 return __perf_evsel__read(evsel, ncpus, nthreads, false);
100}
101
102/**
103 * perf_evsel__read_scaled - Read the aggregate results on all CPUs, scaled
104 *
105 * @evsel - event selector to read value
106 * @ncpus - Number of cpus affected, from zero
107 * @nthreads - Number of threads affected, from zero
108 */
109static inline int perf_evsel__read_scaled(struct perf_evsel *evsel,
110 int ncpus, int nthreads)
111{
112 return __perf_evsel__read(evsel, ncpus, nthreads, true);
113}
114
115#endif /* __PERF_EVSEL_H */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 64a85bafde63..989fa2dee2fd 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -152,6 +152,11 @@ void perf_header__set_feat(struct perf_header *self, int feat)
152 set_bit(feat, self->adds_features); 152 set_bit(feat, self->adds_features);
153} 153}
154 154
155void perf_header__clear_feat(struct perf_header *self, int feat)
156{
157 clear_bit(feat, self->adds_features);
158}
159
155bool perf_header__has_feat(const struct perf_header *self, int feat) 160bool perf_header__has_feat(const struct perf_header *self, int feat)
156{ 161{
157 return test_bit(feat, self->adds_features); 162 return test_bit(feat, self->adds_features);
@@ -265,15 +270,16 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
265 const char *name, bool is_kallsyms) 270 const char *name, bool is_kallsyms)
266{ 271{
267 const size_t size = PATH_MAX; 272 const size_t size = PATH_MAX;
268 char *filename = malloc(size), 273 char *realname = realpath(name, NULL),
274 *filename = malloc(size),
269 *linkname = malloc(size), *targetname; 275 *linkname = malloc(size), *targetname;
270 int len, err = -1; 276 int len, err = -1;
271 277
272 if (filename == NULL || linkname == NULL) 278 if (realname == NULL || filename == NULL || linkname == NULL)
273 goto out_free; 279 goto out_free;
274 280
275 len = snprintf(filename, size, "%s%s%s", 281 len = snprintf(filename, size, "%s%s%s",
276 debugdir, is_kallsyms ? "/" : "", name); 282 debugdir, is_kallsyms ? "/" : "", realname);
277 if (mkdir_p(filename, 0755)) 283 if (mkdir_p(filename, 0755))
278 goto out_free; 284 goto out_free;
279 285
@@ -283,7 +289,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
283 if (is_kallsyms) { 289 if (is_kallsyms) {
284 if (copyfile("/proc/kallsyms", filename)) 290 if (copyfile("/proc/kallsyms", filename))
285 goto out_free; 291 goto out_free;
286 } else if (link(name, filename) && copyfile(name, filename)) 292 } else if (link(realname, filename) && copyfile(name, filename))
287 goto out_free; 293 goto out_free;
288 } 294 }
289 295
@@ -300,6 +306,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
300 if (symlink(targetname, linkname) == 0) 306 if (symlink(targetname, linkname) == 0)
301 err = 0; 307 err = 0;
302out_free: 308out_free:
309 free(realname);
303 free(filename); 310 free(filename);
304 free(linkname); 311 free(linkname);
305 return err; 312 return err;
@@ -431,8 +438,10 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
431 int idx = 0, err; 438 int idx = 0, err;
432 439
433 session = container_of(self, struct perf_session, header); 440 session = container_of(self, struct perf_session, header);
434 if (perf_session__read_build_ids(session, true)) 441
435 perf_header__set_feat(self, HEADER_BUILD_ID); 442 if (perf_header__has_feat(self, HEADER_BUILD_ID &&
443 !perf_session__read_build_ids(session, true)))
444 perf_header__clear_feat(self, HEADER_BUILD_ID);
436 445
437 nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); 446 nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS);
438 if (!nr_sections) 447 if (!nr_sections)
@@ -454,7 +463,7 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
454 463
455 /* Write trace info */ 464 /* Write trace info */
456 trace_sec->offset = lseek(fd, 0, SEEK_CUR); 465 trace_sec->offset = lseek(fd, 0, SEEK_CUR);
457 read_tracing_data(fd, attrs, nr_counters); 466 read_tracing_data(fd, &evsel_list);
458 trace_sec->size = lseek(fd, 0, SEEK_CUR) - trace_sec->offset; 467 trace_sec->size = lseek(fd, 0, SEEK_CUR) - trace_sec->offset;
459 } 468 }
460 469
@@ -597,7 +606,7 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit)
597static int perf_header__getbuffer64(struct perf_header *self, 606static int perf_header__getbuffer64(struct perf_header *self,
598 int fd, void *buf, size_t size) 607 int fd, void *buf, size_t size)
599{ 608{
600 if (do_read(fd, buf, size) <= 0) 609 if (readn(fd, buf, size) <= 0)
601 return -1; 610 return -1;
602 611
603 if (self->needs_swap) 612 if (self->needs_swap)
@@ -653,7 +662,7 @@ int perf_file_header__read(struct perf_file_header *self,
653{ 662{
654 lseek(fd, 0, SEEK_SET); 663 lseek(fd, 0, SEEK_SET);
655 664
656 if (do_read(fd, self, sizeof(*self)) <= 0 || 665 if (readn(fd, self, sizeof(*self)) <= 0 ||
657 memcmp(&self->magic, __perf_magic, sizeof(self->magic))) 666 memcmp(&self->magic, __perf_magic, sizeof(self->magic)))
658 return -1; 667 return -1;
659 668
@@ -814,7 +823,7 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *self,
814 struct perf_header *ph, int fd, 823 struct perf_header *ph, int fd,
815 bool repipe) 824 bool repipe)
816{ 825{
817 if (do_read(fd, self, sizeof(*self)) <= 0 || 826 if (readn(fd, self, sizeof(*self)) <= 0 ||
818 memcmp(&self->magic, __perf_magic, sizeof(self->magic))) 827 memcmp(&self->magic, __perf_magic, sizeof(self->magic)))
819 return -1; 828 return -1;
820 829
@@ -939,6 +948,24 @@ u64 perf_header__sample_type(struct perf_header *header)
939 return type; 948 return type;
940} 949}
941 950
951bool perf_header__sample_id_all(const struct perf_header *header)
952{
953 bool value = false, first = true;
954 int i;
955
956 for (i = 0; i < header->attrs; i++) {
957 struct perf_header_attr *attr = header->attr[i];
958
959 if (first) {
960 value = attr->attr.sample_id_all;
961 first = false;
962 } else if (value != attr->attr.sample_id_all)
963 die("non matching sample_id_all");
964 }
965
966 return value;
967}
968
942struct perf_event_attr * 969struct perf_event_attr *
943perf_header__find_attr(u64 id, struct perf_header *header) 970perf_header__find_attr(u64 id, struct perf_header *header)
944{ 971{
@@ -985,21 +1012,23 @@ int event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
985 1012
986 ev = malloc(size); 1013 ev = malloc(size);
987 1014
1015 if (ev == NULL)
1016 return -ENOMEM;
1017
988 ev->attr.attr = *attr; 1018 ev->attr.attr = *attr;
989 memcpy(ev->attr.id, id, ids * sizeof(u64)); 1019 memcpy(ev->attr.id, id, ids * sizeof(u64));
990 1020
991 ev->attr.header.type = PERF_RECORD_HEADER_ATTR; 1021 ev->attr.header.type = PERF_RECORD_HEADER_ATTR;
992 ev->attr.header.size = size; 1022 ev->attr.header.size = size;
993 1023
994 err = process(ev, session); 1024 err = process(ev, NULL, session);
995 1025
996 free(ev); 1026 free(ev);
997 1027
998 return err; 1028 return err;
999} 1029}
1000 1030
1001int event__synthesize_attrs(struct perf_header *self, 1031int event__synthesize_attrs(struct perf_header *self, event__handler_t process,
1002 event__handler_t process,
1003 struct perf_session *session) 1032 struct perf_session *session)
1004{ 1033{
1005 struct perf_header_attr *attr; 1034 struct perf_header_attr *attr;
@@ -1069,7 +1098,7 @@ int event__synthesize_event_type(u64 event_id, char *name,
1069 ev.event_type.header.size = sizeof(ev.event_type) - 1098 ev.event_type.header.size = sizeof(ev.event_type) -
1070 (sizeof(ev.event_type.event_type.name) - size); 1099 (sizeof(ev.event_type.event_type.name) - size);
1071 1100
1072 err = process(&ev, session); 1101 err = process(&ev, NULL, session);
1073 1102
1074 return err; 1103 return err;
1075} 1104}
@@ -1104,8 +1133,7 @@ int event__process_event_type(event_t *self,
1104 return 0; 1133 return 0;
1105} 1134}
1106 1135
1107int event__synthesize_tracing_data(int fd, struct perf_event_attr *pattrs, 1136int event__synthesize_tracing_data(int fd, struct list_head *pattrs,
1108 int nb_events,
1109 event__handler_t process, 1137 event__handler_t process,
1110 struct perf_session *session __unused) 1138 struct perf_session *session __unused)
1111{ 1139{
@@ -1116,7 +1144,7 @@ int event__synthesize_tracing_data(int fd, struct perf_event_attr *pattrs,
1116 memset(&ev, 0, sizeof(ev)); 1144 memset(&ev, 0, sizeof(ev));
1117 1145
1118 ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA; 1146 ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA;
1119 size = read_tracing_data_size(fd, pattrs, nb_events); 1147 size = read_tracing_data_size(fd, pattrs);
1120 if (size <= 0) 1148 if (size <= 0)
1121 return size; 1149 return size;
1122 aligned_size = ALIGN(size, sizeof(u64)); 1150 aligned_size = ALIGN(size, sizeof(u64));
@@ -1124,9 +1152,9 @@ int event__synthesize_tracing_data(int fd, struct perf_event_attr *pattrs,
1124 ev.tracing_data.header.size = sizeof(ev.tracing_data); 1152 ev.tracing_data.header.size = sizeof(ev.tracing_data);
1125 ev.tracing_data.size = aligned_size; 1153 ev.tracing_data.size = aligned_size;
1126 1154
1127 process(&ev, session); 1155 process(&ev, NULL, session);
1128 1156
1129 err = read_tracing_data(fd, pattrs, nb_events); 1157 err = read_tracing_data(fd, pattrs);
1130 write_padded(fd, NULL, 0, padding); 1158 write_padded(fd, NULL, 0, padding);
1131 1159
1132 return aligned_size; 1160 return aligned_size;
@@ -1184,7 +1212,7 @@ int event__synthesize_build_id(struct dso *pos, u16 misc,
1184 ev.build_id.header.size = sizeof(ev.build_id) + len; 1212 ev.build_id.header.size = sizeof(ev.build_id) + len;
1185 memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len); 1213 memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len);
1186 1214
1187 err = process(&ev, session); 1215 err = process(&ev, NULL, session);
1188 1216
1189 return err; 1217 return err;
1190} 1218}
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 402ac2454cf8..33f16be7b72f 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -81,9 +81,11 @@ void perf_header_attr__delete(struct perf_header_attr *self);
81int perf_header_attr__add_id(struct perf_header_attr *self, u64 id); 81int perf_header_attr__add_id(struct perf_header_attr *self, u64 id);
82 82
83u64 perf_header__sample_type(struct perf_header *header); 83u64 perf_header__sample_type(struct perf_header *header);
84bool perf_header__sample_id_all(const struct perf_header *header);
84struct perf_event_attr * 85struct perf_event_attr *
85perf_header__find_attr(u64 id, struct perf_header *header); 86perf_header__find_attr(u64 id, struct perf_header *header);
86void perf_header__set_feat(struct perf_header *self, int feat); 87void perf_header__set_feat(struct perf_header *self, int feat);
88void perf_header__clear_feat(struct perf_header *self, int feat);
87bool perf_header__has_feat(const struct perf_header *self, int feat); 89bool perf_header__has_feat(const struct perf_header *self, int feat);
88 90
89int perf_header__process_sections(struct perf_header *self, int fd, 91int perf_header__process_sections(struct perf_header *self, int fd,
@@ -111,8 +113,7 @@ int event__synthesize_event_types(event__handler_t process,
111int event__process_event_type(event_t *self, 113int event__process_event_type(event_t *self,
112 struct perf_session *session); 114 struct perf_session *session);
113 115
114int event__synthesize_tracing_data(int fd, struct perf_event_attr *pattrs, 116int event__synthesize_tracing_data(int fd, struct list_head *pattrs,
115 int nb_events,
116 event__handler_t process, 117 event__handler_t process,
117 struct perf_session *session); 118 struct perf_session *session);
118int event__process_tracing_data(event_t *self, 119int event__process_tracing_data(event_t *self,
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 2022e8740994..c749ba6136a0 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -356,7 +356,7 @@ static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask,
356 356
357static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, 357static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain,
358 int depth, int depth_mask, int period, 358 int depth, int depth_mask, int period,
359 u64 total_samples, int hits, 359 u64 total_samples, u64 hits,
360 int left_margin) 360 int left_margin)
361{ 361{
362 int i; 362 int i;
@@ -1092,6 +1092,12 @@ int hist_entry__annotate(struct hist_entry *self, struct list_head *head,
1092 FILE *file; 1092 FILE *file;
1093 int err = 0; 1093 int err = 0;
1094 u64 len; 1094 u64 len;
1095 char symfs_filename[PATH_MAX];
1096
1097 if (filename) {
1098 snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
1099 symbol_conf.symfs, filename);
1100 }
1095 1101
1096 if (filename == NULL) { 1102 if (filename == NULL) {
1097 if (dso->has_build_id) { 1103 if (dso->has_build_id) {
@@ -1100,9 +1106,9 @@ int hist_entry__annotate(struct hist_entry *self, struct list_head *head,
1100 return -ENOMEM; 1106 return -ENOMEM;
1101 } 1107 }
1102 goto fallback; 1108 goto fallback;
1103 } else if (readlink(filename, command, sizeof(command)) < 0 || 1109 } else if (readlink(symfs_filename, command, sizeof(command)) < 0 ||
1104 strstr(command, "[kernel.kallsyms]") || 1110 strstr(command, "[kernel.kallsyms]") ||
1105 access(filename, R_OK)) { 1111 access(symfs_filename, R_OK)) {
1106 free(filename); 1112 free(filename);
1107fallback: 1113fallback:
1108 /* 1114 /*
@@ -1111,6 +1117,8 @@ fallback:
1111 * DSO is the same as when 'perf record' ran. 1117 * DSO is the same as when 'perf record' ran.
1112 */ 1118 */
1113 filename = dso->long_name; 1119 filename = dso->long_name;
1120 snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
1121 symbol_conf.symfs, filename);
1114 free_filename = false; 1122 free_filename = false;
1115 } 1123 }
1116 1124
@@ -1137,7 +1145,7 @@ fallback:
1137 "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS -C %s|grep -v %s|expand", 1145 "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS -C %s|grep -v %s|expand",
1138 map__rip_2objdump(map, sym->start), 1146 map__rip_2objdump(map, sym->start),
1139 map__rip_2objdump(map, sym->end), 1147 map__rip_2objdump(map, sym->end),
1140 filename, filename); 1148 symfs_filename, filename);
1141 1149
1142 pr_debug("Executing: %s\n", command); 1150 pr_debug("Executing: %s\n", command);
1143 1151
@@ -1168,10 +1176,13 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp)
1168 size_t ret = 0; 1176 size_t ret = 0;
1169 1177
1170 for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) { 1178 for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) {
1171 if (!event__name[i]) 1179 const char *name = event__get_event_name(i);
1180
1181 if (!strcmp(name, "UNKNOWN"))
1172 continue; 1182 continue;
1173 ret += fprintf(fp, "%10s events: %10d\n", 1183
1174 event__name[i], self->stats.nr_events[i]); 1184 ret += fprintf(fp, "%16s events: %10d\n", name,
1185 self->stats.nr_events[i]);
1175 } 1186 }
1176 1187
1177 return ret; 1188 return ret;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 587d375d3430..ee789856a8c9 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -52,8 +52,10 @@ struct sym_priv {
52struct events_stats { 52struct events_stats {
53 u64 total_period; 53 u64 total_period;
54 u64 total_lost; 54 u64 total_lost;
55 u64 total_invalid_chains;
55 u32 nr_events[PERF_RECORD_HEADER_MAX]; 56 u32 nr_events[PERF_RECORD_HEADER_MAX];
56 u32 nr_unknown_events; 57 u32 nr_unknown_events;
58 u32 nr_invalid_chains;
57}; 59};
58 60
59enum hist_column { 61enum hist_column {
diff --git a/tools/perf/util/include/asm/cpufeature.h b/tools/perf/util/include/asm/cpufeature.h
new file mode 100644
index 000000000000..acffd5e4d1d4
--- /dev/null
+++ b/tools/perf/util/include/asm/cpufeature.h
@@ -0,0 +1,9 @@
1
2#ifndef PERF_CPUFEATURE_H
3#define PERF_CPUFEATURE_H
4
5/* cpufeature.h ... dummy header file for including arch/x86/lib/memcpy_64.S */
6
7#define X86_FEATURE_REP_GOOD 0
8
9#endif /* PERF_CPUFEATURE_H */
diff --git a/tools/perf/util/include/asm/dwarf2.h b/tools/perf/util/include/asm/dwarf2.h
new file mode 100644
index 000000000000..bb4198e7837a
--- /dev/null
+++ b/tools/perf/util/include/asm/dwarf2.h
@@ -0,0 +1,11 @@
1
2#ifndef PERF_DWARF2_H
3#define PERF_DWARF2_H
4
5/* dwarf2.h ... dummy header file for including arch/x86/lib/memcpy_64.S */
6
7#define CFI_STARTPROC
8#define CFI_ENDPROC
9
10#endif /* PERF_DWARF2_H */
11
diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h
index bb4ac2e05385..8be0b968ca0b 100644
--- a/tools/perf/util/include/linux/bitops.h
+++ b/tools/perf/util/include/linux/bitops.h
@@ -13,6 +13,11 @@ static inline void set_bit(int nr, unsigned long *addr)
13 addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG); 13 addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG);
14} 14}
15 15
16static inline void clear_bit(int nr, unsigned long *addr)
17{
18 addr[nr / BITS_PER_LONG] &= ~(1UL << (nr % BITS_PER_LONG));
19}
20
16static __always_inline int test_bit(unsigned int nr, const unsigned long *addr) 21static __always_inline int test_bit(unsigned int nr, const unsigned long *addr)
17{ 22{
18 return ((1UL << (nr % BITS_PER_LONG)) & 23 return ((1UL << (nr % BITS_PER_LONG)) &
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h
new file mode 100644
index 000000000000..06387cffe125
--- /dev/null
+++ b/tools/perf/util/include/linux/linkage.h
@@ -0,0 +1,13 @@
1
2#ifndef PERF_LINUX_LINKAGE_H_
3#define PERF_LINUX_LINKAGE_H_
4
5/* linkage.h ... for including arch/x86/lib/memcpy_64.S */
6
7#define ENTRY(name) \
8 .globl name; \
9 name:
10
11#define ENDPROC(name)
12
13#endif /* PERF_LINUX_LINKAGE_H_ */
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4af5bd59cfd1..649083f27e08 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1,6 +1,7 @@
1#include "../../../include/linux/hw_breakpoint.h" 1#include "../../../include/linux/hw_breakpoint.h"
2#include "util.h" 2#include "util.h"
3#include "../perf.h" 3#include "../perf.h"
4#include "evsel.h"
4#include "parse-options.h" 5#include "parse-options.h"
5#include "parse-events.h" 6#include "parse-events.h"
6#include "exec_cmd.h" 7#include "exec_cmd.h"
@@ -12,8 +13,7 @@
12 13
13int nr_counters; 14int nr_counters;
14 15
15struct perf_event_attr attrs[MAX_COUNTERS]; 16LIST_HEAD(evsel_list);
16char *filters[MAX_COUNTERS];
17 17
18struct event_symbol { 18struct event_symbol {
19 u8 type; 19 u8 type;
@@ -266,10 +266,10 @@ static char *event_cache_name(u8 cache_type, u8 cache_op, u8 cache_result)
266 return name; 266 return name;
267} 267}
268 268
269const char *event_name(int counter) 269const char *event_name(struct perf_evsel *evsel)
270{ 270{
271 u64 config = attrs[counter].config; 271 u64 config = evsel->attr.config;
272 int type = attrs[counter].type; 272 int type = evsel->attr.type;
273 273
274 return __event_name(type, config); 274 return __event_name(type, config);
275} 275}
@@ -434,7 +434,7 @@ parse_single_tracepoint_event(char *sys_name,
434 id = atoll(id_buf); 434 id = atoll(id_buf);
435 attr->config = id; 435 attr->config = id;
436 attr->type = PERF_TYPE_TRACEPOINT; 436 attr->type = PERF_TYPE_TRACEPOINT;
437 *strp = evt_name + evt_length; 437 *strp += strlen(sys_name) + evt_length + 1; /* + 1 for the ':' */
438 438
439 attr->sample_type |= PERF_SAMPLE_RAW; 439 attr->sample_type |= PERF_SAMPLE_RAW;
440 attr->sample_type |= PERF_SAMPLE_TIME; 440 attr->sample_type |= PERF_SAMPLE_TIME;
@@ -495,7 +495,7 @@ static enum event_result parse_tracepoint_event(const char **strp,
495 struct perf_event_attr *attr) 495 struct perf_event_attr *attr)
496{ 496{
497 const char *evt_name; 497 const char *evt_name;
498 char *flags; 498 char *flags = NULL, *comma_loc;
499 char sys_name[MAX_EVENT_LENGTH]; 499 char sys_name[MAX_EVENT_LENGTH];
500 unsigned int sys_length, evt_length; 500 unsigned int sys_length, evt_length;
501 501
@@ -514,6 +514,11 @@ static enum event_result parse_tracepoint_event(const char **strp,
514 sys_name[sys_length] = '\0'; 514 sys_name[sys_length] = '\0';
515 evt_name = evt_name + 1; 515 evt_name = evt_name + 1;
516 516
517 comma_loc = strchr(evt_name, ',');
518 if (comma_loc) {
519 /* take the event name up to the comma */
520 evt_name = strndup(evt_name, comma_loc - evt_name);
521 }
517 flags = strchr(evt_name, ':'); 522 flags = strchr(evt_name, ':');
518 if (flags) { 523 if (flags) {
519 /* split it out: */ 524 /* split it out: */
@@ -524,9 +529,8 @@ static enum event_result parse_tracepoint_event(const char **strp,
524 evt_length = strlen(evt_name); 529 evt_length = strlen(evt_name);
525 if (evt_length >= MAX_EVENT_LENGTH) 530 if (evt_length >= MAX_EVENT_LENGTH)
526 return EVT_FAILED; 531 return EVT_FAILED;
527
528 if (strpbrk(evt_name, "*?")) { 532 if (strpbrk(evt_name, "*?")) {
529 *strp = evt_name + evt_length; 533 *strp += strlen(sys_name) + evt_length;
530 return parse_multiple_tracepoint_event(sys_name, evt_name, 534 return parse_multiple_tracepoint_event(sys_name, evt_name,
531 flags); 535 flags);
532 } else 536 } else
@@ -810,9 +814,6 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
810 return -1; 814 return -1;
811 815
812 for (;;) { 816 for (;;) {
813 if (nr_counters == MAX_COUNTERS)
814 return -1;
815
816 memset(&attr, 0, sizeof(attr)); 817 memset(&attr, 0, sizeof(attr));
817 ret = parse_event_symbols(&str, &attr); 818 ret = parse_event_symbols(&str, &attr);
818 if (ret == EVT_FAILED) 819 if (ret == EVT_FAILED)
@@ -822,8 +823,13 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
822 return -1; 823 return -1;
823 824
824 if (ret != EVT_HANDLED_ALL) { 825 if (ret != EVT_HANDLED_ALL) {
825 attrs[nr_counters] = attr; 826 struct perf_evsel *evsel;
826 nr_counters++; 827 evsel = perf_evsel__new(attr.type, attr.config,
828 nr_counters);
829 if (evsel == NULL)
830 return -1;
831 list_add_tail(&evsel->node, &evsel_list);
832 ++nr_counters;
827 } 833 }
828 834
829 if (*str == 0) 835 if (*str == 0)
@@ -840,21 +846,22 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
840int parse_filter(const struct option *opt __used, const char *str, 846int parse_filter(const struct option *opt __used, const char *str,
841 int unset __used) 847 int unset __used)
842{ 848{
843 int i = nr_counters - 1; 849 struct perf_evsel *last = NULL;
844 int len = strlen(str);
845 850
846 if (i < 0 || attrs[i].type != PERF_TYPE_TRACEPOINT) { 851 if (!list_empty(&evsel_list))
852 last = list_entry(evsel_list.prev, struct perf_evsel, node);
853
854 if (last == NULL || last->attr.type != PERF_TYPE_TRACEPOINT) {
847 fprintf(stderr, 855 fprintf(stderr,
848 "-F option should follow a -e tracepoint option\n"); 856 "-F option should follow a -e tracepoint option\n");
849 return -1; 857 return -1;
850 } 858 }
851 859
852 filters[i] = malloc(len + 1); 860 last->filter = strdup(str);
853 if (!filters[i]) { 861 if (last->filter == NULL) {
854 fprintf(stderr, "not enough memory to hold filter string\n"); 862 fprintf(stderr, "not enough memory to hold filter string\n");
855 return -1; 863 return -1;
856 } 864 }
857 strcpy(filters[i], str);
858 865
859 return 0; 866 return 0;
860} 867}
@@ -906,6 +913,47 @@ static void print_tracepoint_events(void)
906} 913}
907 914
908/* 915/*
916 * Check whether event is in <debugfs_mount_point>/tracing/events
917 */
918
919int is_valid_tracepoint(const char *event_string)
920{
921 DIR *sys_dir, *evt_dir;
922 struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent;
923 char evt_path[MAXPATHLEN];
924 char dir_path[MAXPATHLEN];
925
926 if (debugfs_valid_mountpoint(debugfs_path))
927 return 0;
928
929 sys_dir = opendir(debugfs_path);
930 if (!sys_dir)
931 return 0;
932
933 for_each_subsystem(sys_dir, sys_dirent, sys_next) {
934
935 snprintf(dir_path, MAXPATHLEN, "%s/%s", debugfs_path,
936 sys_dirent.d_name);
937 evt_dir = opendir(dir_path);
938 if (!evt_dir)
939 continue;
940
941 for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) {
942 snprintf(evt_path, MAXPATHLEN, "%s:%s",
943 sys_dirent.d_name, evt_dirent.d_name);
944 if (!strcmp(evt_path, event_string)) {
945 closedir(evt_dir);
946 closedir(sys_dir);
947 return 1;
948 }
949 }
950 closedir(evt_dir);
951 }
952 closedir(sys_dir);
953 return 0;
954}
955
956/*
909 * Print the help text for the event symbols: 957 * Print the help text for the event symbols:
910 */ 958 */
911void print_events(void) 959void print_events(void)
@@ -963,3 +1011,26 @@ void print_events(void)
963 1011
964 exit(129); 1012 exit(129);
965} 1013}
1014
1015int perf_evsel_list__create_default(void)
1016{
1017 struct perf_evsel *evsel = perf_evsel__new(PERF_TYPE_HARDWARE,
1018 PERF_COUNT_HW_CPU_CYCLES, 0);
1019 if (evsel == NULL)
1020 return -ENOMEM;
1021
1022 list_add(&evsel->node, &evsel_list);
1023 ++nr_counters;
1024 return 0;
1025}
1026
1027void perf_evsel_list__delete(void)
1028{
1029 struct perf_evsel *pos, *n;
1030
1031 list_for_each_entry_safe(pos, n, &evsel_list, node) {
1032 list_del_init(&pos->node);
1033 perf_evsel__delete(pos);
1034 }
1035 nr_counters = 0;
1036}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index fc4ab3fe877a..b82cafb83772 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -4,6 +4,16 @@
4 * Parse symbolic events/counts passed in as options: 4 * Parse symbolic events/counts passed in as options:
5 */ 5 */
6 6
7#include "../../../include/linux/perf_event.h"
8
9struct list_head;
10struct perf_evsel;
11
12extern struct list_head evsel_list;
13
14int perf_evsel_list__create_default(void);
15void perf_evsel_list__delete(void);
16
7struct option; 17struct option;
8 18
9struct tracepoint_path { 19struct tracepoint_path {
@@ -13,14 +23,11 @@ struct tracepoint_path {
13}; 23};
14 24
15extern struct tracepoint_path *tracepoint_id_to_path(u64 config); 25extern struct tracepoint_path *tracepoint_id_to_path(u64 config);
16extern bool have_tracepoints(struct perf_event_attr *pattrs, int nb_events); 26extern bool have_tracepoints(struct list_head *evsel_list);
17 27
18extern int nr_counters; 28extern int nr_counters;
19 29
20extern struct perf_event_attr attrs[MAX_COUNTERS]; 30const char *event_name(struct perf_evsel *event);
21extern char *filters[MAX_COUNTERS];
22
23extern const char *event_name(int ctr);
24extern const char *__event_name(int type, u64 config); 31extern const char *__event_name(int type, u64 config);
25 32
26extern int parse_events(const struct option *opt, const char *str, int unset); 33extern int parse_events(const struct option *opt, const char *str, int unset);
@@ -29,9 +36,9 @@ extern int parse_filter(const struct option *opt, const char *str, int unset);
29#define EVENTS_HELP_MAX (128*1024) 36#define EVENTS_HELP_MAX (128*1024)
30 37
31extern void print_events(void); 38extern void print_events(void);
39extern int is_valid_tracepoint(const char *event_string);
32 40
33extern char debugfs_path[]; 41extern char debugfs_path[];
34extern int valid_debugfs_mount(const char *debugfs); 42extern int valid_debugfs_mount(const char *debugfs);
35 43
36
37#endif /* __PERF_PARSE_EVENTS_H */ 44#endif /* __PERF_PARSE_EVENTS_H */
diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h
index c7d72dce54b2..abc31a1dac1a 100644
--- a/tools/perf/util/parse-options.h
+++ b/tools/perf/util/parse-options.h
@@ -119,6 +119,10 @@ struct option {
119 { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG } 119 { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG }
120#define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \ 120#define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \
121 { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d, .flags = PARSE_OPT_LASTARG_DEFAULT } 121 { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d, .flags = PARSE_OPT_LASTARG_DEFAULT }
122#define OPT_CALLBACK_DEFAULT_NOOPT(s, l, v, a, h, f, d) \
123 { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l),\
124 .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d,\
125 .flags = PARSE_OPT_LASTARG_DEFAULT | PARSE_OPT_NOARG}
122 126
123/* parse_options() will filter out the processed options and leave the 127/* parse_options() will filter out the processed options and leave the
124 * non-option argments in argv[]. 128 * non-option argments in argv[].
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 3b6a5297bf16..128aaab0aeda 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -95,7 +95,7 @@ static int init_vmlinux(void)
95 goto out; 95 goto out;
96 96
97 if (machine__create_kernel_maps(&machine) < 0) { 97 if (machine__create_kernel_maps(&machine) < 0) {
98 pr_debug("machine__create_kernel_maps "); 98 pr_debug("machine__create_kernel_maps() failed.\n");
99 goto out; 99 goto out;
100 } 100 }
101out: 101out:
@@ -114,6 +114,8 @@ static struct symbol *__find_kernel_function_by_name(const char *name,
114const char *kernel_get_module_path(const char *module) 114const char *kernel_get_module_path(const char *module)
115{ 115{
116 struct dso *dso; 116 struct dso *dso;
117 struct map *map;
118 const char *vmlinux_name;
117 119
118 if (module) { 120 if (module) {
119 list_for_each_entry(dso, &machine.kernel_dsos, node) { 121 list_for_each_entry(dso, &machine.kernel_dsos, node) {
@@ -123,10 +125,17 @@ const char *kernel_get_module_path(const char *module)
123 } 125 }
124 pr_debug("Failed to find module %s.\n", module); 126 pr_debug("Failed to find module %s.\n", module);
125 return NULL; 127 return NULL;
128 }
129
130 map = machine.vmlinux_maps[MAP__FUNCTION];
131 dso = map->dso;
132
133 vmlinux_name = symbol_conf.vmlinux_name;
134 if (vmlinux_name) {
135 if (dso__load_vmlinux(dso, map, vmlinux_name, NULL) <= 0)
136 return NULL;
126 } else { 137 } else {
127 dso = machine.vmlinux_maps[MAP__FUNCTION]->dso; 138 if (dso__load_vmlinux_path(dso, map, NULL) <= 0) {
128 if (dso__load_vmlinux_path(dso,
129 machine.vmlinux_maps[MAP__FUNCTION], NULL) < 0) {
130 pr_debug("Failed to load kernel map.\n"); 139 pr_debug("Failed to load kernel map.\n");
131 return NULL; 140 return NULL;
132 } 141 }
@@ -140,7 +149,8 @@ static int open_vmlinux(const char *module)
140{ 149{
141 const char *path = kernel_get_module_path(module); 150 const char *path = kernel_get_module_path(module);
142 if (!path) { 151 if (!path) {
143 pr_err("Failed to find path of %s module", module ?: "kernel"); 152 pr_err("Failed to find path of %s module.\n",
153 module ?: "kernel");
144 return -ENOENT; 154 return -ENOENT;
145 } 155 }
146 pr_debug("Try to open %s\n", path); 156 pr_debug("Try to open %s\n", path);
@@ -217,7 +227,7 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
217 pr_warning("Warning: No dwarf info found in the vmlinux - " 227 pr_warning("Warning: No dwarf info found in the vmlinux - "
218 "please rebuild kernel with CONFIG_DEBUG_INFO=y.\n"); 228 "please rebuild kernel with CONFIG_DEBUG_INFO=y.\n");
219 if (!need_dwarf) { 229 if (!need_dwarf) {
220 pr_debug("Trying to use symbols.\nn"); 230 pr_debug("Trying to use symbols.\n");
221 return 0; 231 return 0;
222 } 232 }
223 } 233 }
@@ -286,42 +296,49 @@ static int get_real_path(const char *raw_path, const char *comp_dir,
286#define LINEBUF_SIZE 256 296#define LINEBUF_SIZE 256
287#define NR_ADDITIONAL_LINES 2 297#define NR_ADDITIONAL_LINES 2
288 298
289static int show_one_line(FILE *fp, int l, bool skip, bool show_num) 299static int __show_one_line(FILE *fp, int l, bool skip, bool show_num)
290{ 300{
291 char buf[LINEBUF_SIZE]; 301 char buf[LINEBUF_SIZE];
292 const char *color = PERF_COLOR_BLUE; 302 const char *color = show_num ? "" : PERF_COLOR_BLUE;
293 303 const char *prefix = NULL;
294 if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
295 goto error;
296 if (!skip) {
297 if (show_num)
298 fprintf(stdout, "%7d %s", l, buf);
299 else
300 color_fprintf(stdout, color, " %s", buf);
301 }
302 304
303 while (strlen(buf) == LINEBUF_SIZE - 1 && 305 do {
304 buf[LINEBUF_SIZE - 2] != '\n') {
305 if (fgets(buf, LINEBUF_SIZE, fp) == NULL) 306 if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
306 goto error; 307 goto error;
307 if (!skip) { 308 if (skip)
308 if (show_num) 309 continue;
309 fprintf(stdout, "%s", buf); 310 if (!prefix) {
310 else 311 prefix = show_num ? "%7d " : " ";
311 color_fprintf(stdout, color, "%s", buf); 312 color_fprintf(stdout, color, prefix, l);
312 } 313 }
313 } 314 color_fprintf(stdout, color, "%s", buf);
314 315
315 return 0; 316 } while (strchr(buf, '\n') == NULL);
317
318 return 1;
316error: 319error:
317 if (feof(fp)) 320 if (ferror(fp)) {
318 pr_warning("Source file is shorter than expected.\n");
319 else
320 pr_warning("File read error: %s\n", strerror(errno)); 321 pr_warning("File read error: %s\n", strerror(errno));
322 return -1;
323 }
324 return 0;
325}
321 326
322 return -1; 327static int _show_one_line(FILE *fp, int l, bool skip, bool show_num)
328{
329 int rv = __show_one_line(fp, l, skip, show_num);
330 if (rv == 0) {
331 pr_warning("Source file is shorter than expected.\n");
332 rv = -1;
333 }
334 return rv;
323} 335}
324 336
337#define show_one_line_with_num(f,l) _show_one_line(f,l,false,true)
338#define show_one_line(f,l) _show_one_line(f,l,false,false)
339#define skip_one_line(f,l) _show_one_line(f,l,true,false)
340#define show_one_line_or_eof(f,l) __show_one_line(f,l,false,false)
341
325/* 342/*
326 * Show line-range always requires debuginfo to find source file and 343 * Show line-range always requires debuginfo to find source file and
327 * line number. 344 * line number.
@@ -370,7 +387,7 @@ int show_line_range(struct line_range *lr, const char *module)
370 fprintf(stdout, "<%s:%d>\n", lr->function, 387 fprintf(stdout, "<%s:%d>\n", lr->function,
371 lr->start - lr->offset); 388 lr->start - lr->offset);
372 else 389 else
373 fprintf(stdout, "<%s:%d>\n", lr->file, lr->start); 390 fprintf(stdout, "<%s:%d>\n", lr->path, lr->start);
374 391
375 fp = fopen(lr->path, "r"); 392 fp = fopen(lr->path, "r");
376 if (fp == NULL) { 393 if (fp == NULL) {
@@ -379,26 +396,30 @@ int show_line_range(struct line_range *lr, const char *module)
379 return -errno; 396 return -errno;
380 } 397 }
381 /* Skip to starting line number */ 398 /* Skip to starting line number */
382 while (l < lr->start && ret >= 0) 399 while (l < lr->start) {
383 ret = show_one_line(fp, l++, true, false); 400 ret = skip_one_line(fp, l++);
384 if (ret < 0) 401 if (ret < 0)
385 goto end; 402 goto end;
403 }
386 404
387 list_for_each_entry(ln, &lr->line_list, list) { 405 list_for_each_entry(ln, &lr->line_list, list) {
388 while (ln->line > l && ret >= 0) 406 for (; ln->line > l; l++) {
389 ret = show_one_line(fp, (l++) - lr->offset, 407 ret = show_one_line(fp, l - lr->offset);
390 false, false); 408 if (ret < 0)
391 if (ret >= 0) 409 goto end;
392 ret = show_one_line(fp, (l++) - lr->offset, 410 }
393 false, true); 411 ret = show_one_line_with_num(fp, l++ - lr->offset);
394 if (ret < 0) 412 if (ret < 0)
395 goto end; 413 goto end;
396 } 414 }
397 415
398 if (lr->end == INT_MAX) 416 if (lr->end == INT_MAX)
399 lr->end = l + NR_ADDITIONAL_LINES; 417 lr->end = l + NR_ADDITIONAL_LINES;
400 while (l <= lr->end && !feof(fp) && ret >= 0) 418 while (l <= lr->end) {
401 ret = show_one_line(fp, (l++) - lr->offset, false, false); 419 ret = show_one_line_or_eof(fp, l++ - lr->offset);
420 if (ret <= 0)
421 break;
422 }
402end: 423end:
403 fclose(fp); 424 fclose(fp);
404 return ret; 425 return ret;
@@ -457,7 +478,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs,
457 478
458 fd = open_vmlinux(module); 479 fd = open_vmlinux(module);
459 if (fd < 0) { 480 if (fd < 0) {
460 pr_warning("Failed to open debuginfo file.\n"); 481 pr_warning("Failed to open debug information file.\n");
461 return fd; 482 return fd;
462 } 483 }
463 484
@@ -517,56 +538,87 @@ int show_available_vars(struct perf_probe_event *pevs __unused,
517} 538}
518#endif 539#endif
519 540
541static int parse_line_num(char **ptr, int *val, const char *what)
542{
543 const char *start = *ptr;
544
545 errno = 0;
546 *val = strtol(*ptr, ptr, 0);
547 if (errno || *ptr == start) {
548 semantic_error("'%s' is not a valid number.\n", what);
549 return -EINVAL;
550 }
551 return 0;
552}
553
554/*
555 * Stuff 'lr' according to the line range described by 'arg'.
556 * The line range syntax is described by:
557 *
558 * SRC[:SLN[+NUM|-ELN]]
559 * FNC[:SLN[+NUM|-ELN]]
560 */
520int parse_line_range_desc(const char *arg, struct line_range *lr) 561int parse_line_range_desc(const char *arg, struct line_range *lr)
521{ 562{
522 const char *ptr; 563 char *range, *name = strdup(arg);
523 char *tmp; 564 int err;
524 /* 565
525 * <Syntax> 566 if (!name)
526 * SRC:SLN[+NUM|-ELN] 567 return -ENOMEM;
527 * FUNC[:SLN[+NUM|-ELN]] 568
528 */ 569 lr->start = 0;
529 ptr = strchr(arg, ':'); 570 lr->end = INT_MAX;
530 if (ptr) { 571
531 lr->start = (int)strtoul(ptr + 1, &tmp, 0); 572 range = strchr(name, ':');
532 if (*tmp == '+') { 573 if (range) {
533 lr->end = lr->start + (int)strtoul(tmp + 1, &tmp, 0); 574 *range++ = '\0';
534 lr->end--; /* 575
535 * Adjust the number of lines here. 576 err = parse_line_num(&range, &lr->start, "start line");
536 * If the number of lines == 1, the 577 if (err)
537 * the end of line should be equal to 578 goto err;
538 * the start of line. 579
539 */ 580 if (*range == '+' || *range == '-') {
540 } else if (*tmp == '-') 581 const char c = *range++;
541 lr->end = (int)strtoul(tmp + 1, &tmp, 0); 582
542 else 583 err = parse_line_num(&range, &lr->end, "end line");
543 lr->end = INT_MAX; 584 if (err)
585 goto err;
586
587 if (c == '+') {
588 lr->end += lr->start;
589 /*
590 * Adjust the number of lines here.
591 * If the number of lines == 1, the
592 * the end of line should be equal to
593 * the start of line.
594 */
595 lr->end--;
596 }
597 }
598
544 pr_debug("Line range is %d to %d\n", lr->start, lr->end); 599 pr_debug("Line range is %d to %d\n", lr->start, lr->end);
600
601 err = -EINVAL;
545 if (lr->start > lr->end) { 602 if (lr->start > lr->end) {
546 semantic_error("Start line must be smaller" 603 semantic_error("Start line must be smaller"
547 " than end line.\n"); 604 " than end line.\n");
548 return -EINVAL; 605 goto err;
549 } 606 }
550 if (*tmp != '\0') { 607 if (*range != '\0') {
551 semantic_error("Tailing with invalid character '%d'.\n", 608 semantic_error("Tailing with invalid str '%s'.\n", range);
552 *tmp); 609 goto err;
553 return -EINVAL;
554 } 610 }
555 tmp = strndup(arg, (ptr - arg));
556 } else {
557 tmp = strdup(arg);
558 lr->end = INT_MAX;
559 } 611 }
560 612
561 if (tmp == NULL) 613 if (strchr(name, '.'))
562 return -ENOMEM; 614 lr->file = name;
563
564 if (strchr(tmp, '.'))
565 lr->file = tmp;
566 else 615 else
567 lr->function = tmp; 616 lr->function = name;
568 617
569 return 0; 618 return 0;
619err:
620 free(name);
621 return err;
570} 622}
571 623
572/* Check the name is good for event/group */ 624/* Check the name is good for event/group */
@@ -690,39 +742,40 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
690 742
691 /* Exclusion check */ 743 /* Exclusion check */
692 if (pp->lazy_line && pp->line) { 744 if (pp->lazy_line && pp->line) {
693 semantic_error("Lazy pattern can't be used with line number."); 745 semantic_error("Lazy pattern can't be used with"
746 " line number.\n");
694 return -EINVAL; 747 return -EINVAL;
695 } 748 }
696 749
697 if (pp->lazy_line && pp->offset) { 750 if (pp->lazy_line && pp->offset) {
698 semantic_error("Lazy pattern can't be used with offset."); 751 semantic_error("Lazy pattern can't be used with offset.\n");
699 return -EINVAL; 752 return -EINVAL;
700 } 753 }
701 754
702 if (pp->line && pp->offset) { 755 if (pp->line && pp->offset) {
703 semantic_error("Offset can't be used with line number."); 756 semantic_error("Offset can't be used with line number.\n");
704 return -EINVAL; 757 return -EINVAL;
705 } 758 }
706 759
707 if (!pp->line && !pp->lazy_line && pp->file && !pp->function) { 760 if (!pp->line && !pp->lazy_line && pp->file && !pp->function) {
708 semantic_error("File always requires line number or " 761 semantic_error("File always requires line number or "
709 "lazy pattern."); 762 "lazy pattern.\n");
710 return -EINVAL; 763 return -EINVAL;
711 } 764 }
712 765
713 if (pp->offset && !pp->function) { 766 if (pp->offset && !pp->function) {
714 semantic_error("Offset requires an entry function."); 767 semantic_error("Offset requires an entry function.\n");
715 return -EINVAL; 768 return -EINVAL;
716 } 769 }
717 770
718 if (pp->retprobe && !pp->function) { 771 if (pp->retprobe && !pp->function) {
719 semantic_error("Return probe requires an entry function."); 772 semantic_error("Return probe requires an entry function.\n");
720 return -EINVAL; 773 return -EINVAL;
721 } 774 }
722 775
723 if ((pp->offset || pp->line || pp->lazy_line) && pp->retprobe) { 776 if ((pp->offset || pp->line || pp->lazy_line) && pp->retprobe) {
724 semantic_error("Offset/Line/Lazy pattern can't be used with " 777 semantic_error("Offset/Line/Lazy pattern can't be used with "
725 "return probe."); 778 "return probe.\n");
726 return -EINVAL; 779 return -EINVAL;
727 } 780 }
728 781
@@ -996,7 +1049,7 @@ int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, size_t len)
996 1049
997 return tmp - buf; 1050 return tmp - buf;
998error: 1051error:
999 pr_debug("Failed to synthesize perf probe argument: %s", 1052 pr_debug("Failed to synthesize perf probe argument: %s\n",
1000 strerror(-ret)); 1053 strerror(-ret));
1001 return ret; 1054 return ret;
1002} 1055}
@@ -1024,13 +1077,13 @@ static char *synthesize_perf_probe_point(struct perf_probe_point *pp)
1024 goto error; 1077 goto error;
1025 } 1078 }
1026 if (pp->file) { 1079 if (pp->file) {
1027 len = strlen(pp->file) - 31; 1080 tmp = pp->file;
1028 if (len < 0) 1081 len = strlen(tmp);
1029 len = 0; 1082 if (len > 30) {
1030 tmp = strchr(pp->file + len, '/'); 1083 tmp = strchr(pp->file + len - 30, '/');
1031 if (!tmp) 1084 tmp = tmp ? tmp + 1 : pp->file + len - 30;
1032 tmp = pp->file + len; 1085 }
1033 ret = e_snprintf(file, 32, "@%s", tmp + 1); 1086 ret = e_snprintf(file, 32, "@%s", tmp);
1034 if (ret <= 0) 1087 if (ret <= 0)
1035 goto error; 1088 goto error;
1036 } 1089 }
@@ -1046,7 +1099,7 @@ static char *synthesize_perf_probe_point(struct perf_probe_point *pp)
1046 1099
1047 return buf; 1100 return buf;
1048error: 1101error:
1049 pr_debug("Failed to synthesize perf probe point: %s", 1102 pr_debug("Failed to synthesize perf probe point: %s\n",
1050 strerror(-ret)); 1103 strerror(-ret));
1051 if (buf) 1104 if (buf)
1052 free(buf); 1105 free(buf);
@@ -1787,7 +1840,7 @@ static int del_trace_probe_event(int fd, const char *group,
1787 1840
1788 ret = e_snprintf(buf, 128, "%s:%s", group, event); 1841 ret = e_snprintf(buf, 128, "%s:%s", group, event);
1789 if (ret < 0) { 1842 if (ret < 0) {
1790 pr_err("Failed to copy event."); 1843 pr_err("Failed to copy event.\n");
1791 return ret; 1844 return ret;
1792 } 1845 }
1793 1846
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 3991d73d1cff..ab83b6ac5d65 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -117,28 +117,6 @@ static void line_list__free(struct list_head *head)
117} 117}
118 118
119/* Dwarf FL wrappers */ 119/* Dwarf FL wrappers */
120
121static int __linux_kernel_find_elf(Dwfl_Module *mod,
122 void **userdata,
123 const char *module_name,
124 Dwarf_Addr base,
125 char **file_name, Elf **elfp)
126{
127 int fd;
128 const char *path = kernel_get_module_path(module_name);
129
130 if (path) {
131 fd = open(path, O_RDONLY);
132 if (fd >= 0) {
133 *file_name = strdup(path);
134 return fd;
135 }
136 }
137 /* If failed, try to call standard method */
138 return dwfl_linux_kernel_find_elf(mod, userdata, module_name, base,
139 file_name, elfp);
140}
141
142static char *debuginfo_path; /* Currently dummy */ 120static char *debuginfo_path; /* Currently dummy */
143 121
144static const Dwfl_Callbacks offline_callbacks = { 122static const Dwfl_Callbacks offline_callbacks = {
@@ -151,14 +129,6 @@ static const Dwfl_Callbacks offline_callbacks = {
151 .find_elf = dwfl_build_id_find_elf, 129 .find_elf = dwfl_build_id_find_elf,
152}; 130};
153 131
154static const Dwfl_Callbacks kernel_callbacks = {
155 .find_debuginfo = dwfl_standard_find_debuginfo,
156 .debuginfo_path = &debuginfo_path,
157
158 .find_elf = __linux_kernel_find_elf,
159 .section_address = dwfl_linux_kernel_module_section_address,
160};
161
162/* Get a Dwarf from offline image */ 132/* Get a Dwarf from offline image */
163static Dwarf *dwfl_init_offline_dwarf(int fd, Dwfl **dwflp, Dwarf_Addr *bias) 133static Dwarf *dwfl_init_offline_dwarf(int fd, Dwfl **dwflp, Dwarf_Addr *bias)
164{ 134{
@@ -185,6 +155,38 @@ error:
185 return dbg; 155 return dbg;
186} 156}
187 157
158#if _ELFUTILS_PREREQ(0, 148)
159/* This method is buggy if elfutils is older than 0.148 */
160static int __linux_kernel_find_elf(Dwfl_Module *mod,
161 void **userdata,
162 const char *module_name,
163 Dwarf_Addr base,
164 char **file_name, Elf **elfp)
165{
166 int fd;
167 const char *path = kernel_get_module_path(module_name);
168
169 pr_debug2("Use file %s for %s\n", path, module_name);
170 if (path) {
171 fd = open(path, O_RDONLY);
172 if (fd >= 0) {
173 *file_name = strdup(path);
174 return fd;
175 }
176 }
177 /* If failed, try to call standard method */
178 return dwfl_linux_kernel_find_elf(mod, userdata, module_name, base,
179 file_name, elfp);
180}
181
182static const Dwfl_Callbacks kernel_callbacks = {
183 .find_debuginfo = dwfl_standard_find_debuginfo,
184 .debuginfo_path = &debuginfo_path,
185
186 .find_elf = __linux_kernel_find_elf,
187 .section_address = dwfl_linux_kernel_module_section_address,
188};
189
188/* Get a Dwarf from live kernel image */ 190/* Get a Dwarf from live kernel image */
189static Dwarf *dwfl_init_live_kernel_dwarf(Dwarf_Addr addr, Dwfl **dwflp, 191static Dwarf *dwfl_init_live_kernel_dwarf(Dwarf_Addr addr, Dwfl **dwflp,
190 Dwarf_Addr *bias) 192 Dwarf_Addr *bias)
@@ -205,11 +207,34 @@ static Dwarf *dwfl_init_live_kernel_dwarf(Dwarf_Addr addr, Dwfl **dwflp,
205 dbg = dwfl_addrdwarf(*dwflp, addr, bias); 207 dbg = dwfl_addrdwarf(*dwflp, addr, bias);
206 /* Here, check whether we could get a real dwarf */ 208 /* Here, check whether we could get a real dwarf */
207 if (!dbg) { 209 if (!dbg) {
210 pr_debug("Failed to find kernel dwarf at %lx\n",
211 (unsigned long)addr);
208 dwfl_end(*dwflp); 212 dwfl_end(*dwflp);
209 *dwflp = NULL; 213 *dwflp = NULL;
210 } 214 }
211 return dbg; 215 return dbg;
212} 216}
217#else
218/* With older elfutils, this just support kernel module... */
219static Dwarf *dwfl_init_live_kernel_dwarf(Dwarf_Addr addr __used, Dwfl **dwflp,
220 Dwarf_Addr *bias)
221{
222 int fd;
223 const char *path = kernel_get_module_path("kernel");
224
225 if (!path) {
226 pr_err("Failed to find vmlinux path\n");
227 return NULL;
228 }
229
230 pr_debug2("Use file %s for debuginfo\n", path);
231 fd = open(path, O_RDONLY);
232 if (fd < 0)
233 return NULL;
234
235 return dwfl_init_offline_dwarf(fd, dwflp, bias);
236}
237#endif
213 238
214/* Dwarf wrappers */ 239/* Dwarf wrappers */
215 240
@@ -627,8 +652,8 @@ static_var:
627 regs = get_arch_regstr(regn); 652 regs = get_arch_regstr(regn);
628 if (!regs) { 653 if (!regs) {
629 /* This should be a bug in DWARF or this tool */ 654 /* This should be a bug in DWARF or this tool */
630 pr_warning("Mapping for DWARF register number %u " 655 pr_warning("Mapping for the register number %u "
631 "missing on this architecture.", regn); 656 "missing on this architecture.\n", regn);
632 return -ERANGE; 657 return -ERANGE;
633 } 658 }
634 659
@@ -674,13 +699,14 @@ static int convert_variable_type(Dwarf_Die *vr_die,
674 if (ret != DW_TAG_pointer_type && 699 if (ret != DW_TAG_pointer_type &&
675 ret != DW_TAG_array_type) { 700 ret != DW_TAG_array_type) {
676 pr_warning("Failed to cast into string: " 701 pr_warning("Failed to cast into string: "
677 "%s(%s) is not a pointer nor array.", 702 "%s(%s) is not a pointer nor array.\n",
678 dwarf_diename(vr_die), dwarf_diename(&type)); 703 dwarf_diename(vr_die), dwarf_diename(&type));
679 return -EINVAL; 704 return -EINVAL;
680 } 705 }
681 if (ret == DW_TAG_pointer_type) { 706 if (ret == DW_TAG_pointer_type) {
682 if (die_get_real_type(&type, &type) == NULL) { 707 if (die_get_real_type(&type, &type) == NULL) {
683 pr_warning("Failed to get a type information."); 708 pr_warning("Failed to get a type"
709 " information.\n");
684 return -ENOENT; 710 return -ENOENT;
685 } 711 }
686 while (*ref_ptr) 712 while (*ref_ptr)
@@ -695,7 +721,7 @@ static int convert_variable_type(Dwarf_Die *vr_die,
695 if (!die_compare_name(&type, "char") && 721 if (!die_compare_name(&type, "char") &&
696 !die_compare_name(&type, "unsigned char")) { 722 !die_compare_name(&type, "unsigned char")) {
697 pr_warning("Failed to cast into string: " 723 pr_warning("Failed to cast into string: "
698 "%s is not (unsigned) char *.", 724 "%s is not (unsigned) char *.\n",
699 dwarf_diename(vr_die)); 725 dwarf_diename(vr_die));
700 return -EINVAL; 726 return -EINVAL;
701 } 727 }
@@ -805,8 +831,8 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
805 return -EINVAL; 831 return -EINVAL;
806 } 832 }
807 if (field->name[0] == '[') { 833 if (field->name[0] == '[') {
808 pr_err("Semantic error: %s is not a pointor nor array.", 834 pr_err("Semantic error: %s is not a pointor"
809 varname); 835 " nor array.\n", varname);
810 return -EINVAL; 836 return -EINVAL;
811 } 837 }
812 if (field->ref) { 838 if (field->ref) {
@@ -953,7 +979,7 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwarf_Addr paddr,
953 name = dwarf_diename(sp_die); 979 name = dwarf_diename(sp_die);
954 if (name) { 980 if (name) {
955 if (dwarf_entrypc(sp_die, &eaddr) != 0) { 981 if (dwarf_entrypc(sp_die, &eaddr) != 0) {
956 pr_warning("Failed to get entry pc of %s\n", 982 pr_warning("Failed to get entry address of %s\n",
957 dwarf_diename(sp_die)); 983 dwarf_diename(sp_die));
958 return -ENOENT; 984 return -ENOENT;
959 } 985 }
@@ -969,7 +995,7 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwarf_Addr paddr,
969 if (retprobe) { 995 if (retprobe) {
970 if (eaddr != paddr) { 996 if (eaddr != paddr) {
971 pr_warning("Return probe must be on the head of" 997 pr_warning("Return probe must be on the head of"
972 " a real function\n"); 998 " a real function.\n");
973 return -EINVAL; 999 return -EINVAL;
974 } 1000 }
975 tp->retprobe = true; 1001 tp->retprobe = true;
@@ -1008,7 +1034,7 @@ static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf)
1008 Dwarf_Frame *frame; 1034 Dwarf_Frame *frame;
1009 if (dwarf_cfi_addrframe(pf->cfi, pf->addr, &frame) != 0 || 1035 if (dwarf_cfi_addrframe(pf->cfi, pf->addr, &frame) != 0 ||
1010 dwarf_frame_cfa(frame, &pf->fb_ops, &nops) != 0) { 1036 dwarf_frame_cfa(frame, &pf->fb_ops, &nops) != 0) {
1011 pr_warning("Failed to get CFA on 0x%jx\n", 1037 pr_warning("Failed to get call frame on 0x%jx\n",
1012 (uintmax_t)pf->addr); 1038 (uintmax_t)pf->addr);
1013 return -ENOENT; 1039 return -ENOENT;
1014 } 1040 }
@@ -1035,7 +1061,7 @@ static int find_probe_point_by_line(struct probe_finder *pf)
1035 int ret = 0; 1061 int ret = 0;
1036 1062
1037 if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) { 1063 if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) {
1038 pr_warning("No source lines found in this CU.\n"); 1064 pr_warning("No source lines found.\n");
1039 return -ENOENT; 1065 return -ENOENT;
1040 } 1066 }
1041 1067
@@ -1137,7 +1163,7 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
1137 } 1163 }
1138 1164
1139 if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) { 1165 if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) {
1140 pr_warning("No source lines found in this CU.\n"); 1166 pr_warning("No source lines found.\n");
1141 return -ENOENT; 1167 return -ENOENT;
1142 } 1168 }
1143 1169
@@ -1195,7 +1221,7 @@ static int probe_point_inline_cb(Dwarf_Die *in_die, void *data)
1195 else { 1221 else {
1196 /* Get probe address */ 1222 /* Get probe address */
1197 if (dwarf_entrypc(in_die, &addr) != 0) { 1223 if (dwarf_entrypc(in_die, &addr) != 0) {
1198 pr_warning("Failed to get entry pc of %s.\n", 1224 pr_warning("Failed to get entry address of %s.\n",
1199 dwarf_diename(in_die)); 1225 dwarf_diename(in_die));
1200 param->retval = -ENOENT; 1226 param->retval = -ENOENT;
1201 return DWARF_CB_ABORT; 1227 return DWARF_CB_ABORT;
@@ -1236,8 +1262,8 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
1236 param->retval = find_probe_point_lazy(sp_die, pf); 1262 param->retval = find_probe_point_lazy(sp_die, pf);
1237 else { 1263 else {
1238 if (dwarf_entrypc(sp_die, &pf->addr) != 0) { 1264 if (dwarf_entrypc(sp_die, &pf->addr) != 0) {
1239 pr_warning("Failed to get entry pc of %s.\n", 1265 pr_warning("Failed to get entry address of "
1240 dwarf_diename(sp_die)); 1266 "%s.\n", dwarf_diename(sp_die));
1241 param->retval = -ENOENT; 1267 param->retval = -ENOENT;
1242 return DWARF_CB_ABORT; 1268 return DWARF_CB_ABORT;
1243 } 1269 }
@@ -1279,7 +1305,7 @@ static int find_probes(int fd, struct probe_finder *pf)
1279 1305
1280 dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias); 1306 dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias);
1281 if (!dbg) { 1307 if (!dbg) {
1282 pr_warning("No dwarf info found in the vmlinux - " 1308 pr_warning("No debug information found in the vmlinux - "
1283 "please rebuild with CONFIG_DEBUG_INFO=y.\n"); 1309 "please rebuild with CONFIG_DEBUG_INFO=y.\n");
1284 return -EBADF; 1310 return -EBADF;
1285 } 1311 }
@@ -1524,7 +1550,7 @@ int find_perf_probe_point(unsigned long addr, struct perf_probe_point *ppt)
1524 /* Open the live linux kernel */ 1550 /* Open the live linux kernel */
1525 dbg = dwfl_init_live_kernel_dwarf(addr, &dwfl, &bias); 1551 dbg = dwfl_init_live_kernel_dwarf(addr, &dwfl, &bias);
1526 if (!dbg) { 1552 if (!dbg) {
1527 pr_warning("No dwarf info found in the vmlinux - " 1553 pr_warning("No debug information found in the vmlinux - "
1528 "please rebuild with CONFIG_DEBUG_INFO=y.\n"); 1554 "please rebuild with CONFIG_DEBUG_INFO=y.\n");
1529 ret = -EINVAL; 1555 ret = -EINVAL;
1530 goto end; 1556 goto end;
@@ -1534,7 +1560,8 @@ int find_perf_probe_point(unsigned long addr, struct perf_probe_point *ppt)
1534 addr += bias; 1560 addr += bias;
1535 /* Find cu die */ 1561 /* Find cu die */
1536 if (!dwarf_addrdie(dbg, (Dwarf_Addr)addr - bias, &cudie)) { 1562 if (!dwarf_addrdie(dbg, (Dwarf_Addr)addr - bias, &cudie)) {
1537 pr_warning("No CU DIE is found at %lx\n", addr); 1563 pr_warning("Failed to find debug information for address %lx\n",
1564 addr);
1538 ret = -EINVAL; 1565 ret = -EINVAL;
1539 goto end; 1566 goto end;
1540 } 1567 }
@@ -1659,7 +1686,7 @@ static int find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf)
1659 1686
1660 line_list__init(&lf->lr->line_list); 1687 line_list__init(&lf->lr->line_list);
1661 if (dwarf_getsrclines(&lf->cu_die, &lines, &nlines) != 0) { 1688 if (dwarf_getsrclines(&lf->cu_die, &lines, &nlines) != 0) {
1662 pr_warning("No source lines found in this CU.\n"); 1689 pr_warning("No source lines found.\n");
1663 return -ENOENT; 1690 return -ENOENT;
1664 } 1691 }
1665 1692
@@ -1784,7 +1811,7 @@ int find_line_range(int fd, struct line_range *lr)
1784 1811
1785 dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias); 1812 dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias);
1786 if (!dbg) { 1813 if (!dbg) {
1787 pr_warning("No dwarf info found in the vmlinux - " 1814 pr_warning("No debug information found in the vmlinux - "
1788 "please rebuild with CONFIG_DEBUG_INFO=y.\n"); 1815 "please rebuild with CONFIG_DEBUG_INFO=y.\n");
1789 return -EBADF; 1816 return -EBADF;
1790 } 1817 }
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index bba69d455699..beaefc3c1223 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -34,9 +34,9 @@ extern int find_available_vars_at(int fd, struct perf_probe_event *pev,
34 bool externs); 34 bool externs);
35 35
36#include <dwarf.h> 36#include <dwarf.h>
37#include <libdw.h> 37#include <elfutils/libdw.h>
38#include <libdwfl.h> 38#include <elfutils/libdwfl.h>
39#include <version.h> 39#include <elfutils/version.h>
40 40
41struct probe_finder { 41struct probe_finder {
42 struct perf_probe_event *pev; /* Target probe event */ 42 struct perf_probe_event *pev; /* Target probe event */
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index b059dc50cc2d..93680818e244 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * trace-event-perl. Feed perf trace events to an embedded Perl interpreter. 2 * trace-event-perl. Feed perf script events to an embedded Perl interpreter.
3 * 3 *
4 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com> 4 * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
5 * 5 *
@@ -411,8 +411,8 @@ static int perl_generate_script(const char *outfile)
411 return -1; 411 return -1;
412 } 412 }
413 413
414 fprintf(ofp, "# perf trace event handlers, " 414 fprintf(ofp, "# perf script event handlers, "
415 "generated by perf trace -g perl\n"); 415 "generated by perf script -g perl\n");
416 416
417 fprintf(ofp, "# Licensed under the terms of the GNU GPL" 417 fprintf(ofp, "# Licensed under the terms of the GNU GPL"
418 " License version 2\n\n"); 418 " License version 2\n\n");
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 33a632523743..c6d99334bdfa 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -442,8 +442,8 @@ static int python_generate_script(const char *outfile)
442 fprintf(stderr, "couldn't open %s\n", fname); 442 fprintf(stderr, "couldn't open %s\n", fname);
443 return -1; 443 return -1;
444 } 444 }
445 fprintf(ofp, "# perf trace event handlers, " 445 fprintf(ofp, "# perf script event handlers, "
446 "generated by perf trace -g python\n"); 446 "generated by perf script -g python\n");
447 447
448 fprintf(ofp, "# Licensed under the terms of the GNU GPL" 448 fprintf(ofp, "# Licensed under the terms of the GNU GPL"
449 " License version 2\n\n"); 449 " License version 2\n\n");
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index fa9d652c2dc3..6fb4694d05fa 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -65,9 +65,49 @@ out_close:
65 return -1; 65 return -1;
66} 66}
67 67
68static void perf_session__id_header_size(struct perf_session *session)
69{
70 struct sample_data *data;
71 u64 sample_type = session->sample_type;
72 u16 size = 0;
73
74 if (!session->sample_id_all)
75 goto out;
76
77 if (sample_type & PERF_SAMPLE_TID)
78 size += sizeof(data->tid) * 2;
79
80 if (sample_type & PERF_SAMPLE_TIME)
81 size += sizeof(data->time);
82
83 if (sample_type & PERF_SAMPLE_ID)
84 size += sizeof(data->id);
85
86 if (sample_type & PERF_SAMPLE_STREAM_ID)
87 size += sizeof(data->stream_id);
88
89 if (sample_type & PERF_SAMPLE_CPU)
90 size += sizeof(data->cpu) * 2;
91out:
92 session->id_hdr_size = size;
93}
94
95void perf_session__set_sample_id_all(struct perf_session *session, bool value)
96{
97 session->sample_id_all = value;
98 perf_session__id_header_size(session);
99}
100
101void perf_session__set_sample_type(struct perf_session *session, u64 type)
102{
103 session->sample_type = type;
104}
105
68void perf_session__update_sample_type(struct perf_session *self) 106void perf_session__update_sample_type(struct perf_session *self)
69{ 107{
70 self->sample_type = perf_header__sample_type(&self->header); 108 self->sample_type = perf_header__sample_type(&self->header);
109 self->sample_id_all = perf_header__sample_id_all(&self->header);
110 perf_session__id_header_size(self);
71} 111}
72 112
73int perf_session__create_kernel_maps(struct perf_session *self) 113int perf_session__create_kernel_maps(struct perf_session *self)
@@ -85,7 +125,9 @@ static void perf_session__destroy_kernel_maps(struct perf_session *self)
85 machines__destroy_guest_kernel_maps(&self->machines); 125 machines__destroy_guest_kernel_maps(&self->machines);
86} 126}
87 127
88struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe) 128struct perf_session *perf_session__new(const char *filename, int mode,
129 bool force, bool repipe,
130 struct perf_event_ops *ops)
89{ 131{
90 size_t len = filename ? strlen(filename) + 1 : 0; 132 size_t len = filename ? strlen(filename) + 1 : 0;
91 struct perf_session *self = zalloc(sizeof(*self) + len); 133 struct perf_session *self = zalloc(sizeof(*self) + len);
@@ -101,10 +143,20 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
101 INIT_LIST_HEAD(&self->dead_threads); 143 INIT_LIST_HEAD(&self->dead_threads);
102 self->hists_tree = RB_ROOT; 144 self->hists_tree = RB_ROOT;
103 self->last_match = NULL; 145 self->last_match = NULL;
104 self->mmap_window = 32; 146 /*
147 * On 64bit we can mmap the data file in one go. No need for tiny mmap
148 * slices. On 32bit we use 32MB.
149 */
150#if BITS_PER_LONG == 64
151 self->mmap_window = ULLONG_MAX;
152#else
153 self->mmap_window = 32 * 1024 * 1024ULL;
154#endif
105 self->machines = RB_ROOT; 155 self->machines = RB_ROOT;
106 self->repipe = repipe; 156 self->repipe = repipe;
107 INIT_LIST_HEAD(&self->ordered_samples.samples_head); 157 INIT_LIST_HEAD(&self->ordered_samples.samples);
158 INIT_LIST_HEAD(&self->ordered_samples.sample_cache);
159 INIT_LIST_HEAD(&self->ordered_samples.to_free);
108 machine__init(&self->host_machine, "", HOST_KERNEL_ID); 160 machine__init(&self->host_machine, "", HOST_KERNEL_ID);
109 161
110 if (mode == O_RDONLY) { 162 if (mode == O_RDONLY) {
@@ -120,6 +172,13 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
120 } 172 }
121 173
122 perf_session__update_sample_type(self); 174 perf_session__update_sample_type(self);
175
176 if (ops && ops->ordering_requires_timestamps &&
177 ops->ordered_samples && !self->sample_id_all) {
178 dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
179 ops->ordered_samples = false;
180 }
181
123out: 182out:
124 return self; 183 return self;
125out_free: 184out_free:
@@ -230,7 +289,15 @@ struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
230 return syms; 289 return syms;
231} 290}
232 291
292static int process_event_synth_stub(event_t *event __used,
293 struct perf_session *session __used)
294{
295 dump_printf(": unhandled!\n");
296 return 0;
297}
298
233static int process_event_stub(event_t *event __used, 299static int process_event_stub(event_t *event __used,
300 struct sample_data *sample __used,
234 struct perf_session *session __used) 301 struct perf_session *session __used)
235{ 302{
236 dump_printf(": unhandled!\n"); 303 dump_printf(": unhandled!\n");
@@ -262,7 +329,7 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
262 if (handler->exit == NULL) 329 if (handler->exit == NULL)
263 handler->exit = process_event_stub; 330 handler->exit = process_event_stub;
264 if (handler->lost == NULL) 331 if (handler->lost == NULL)
265 handler->lost = process_event_stub; 332 handler->lost = event__process_lost;
266 if (handler->read == NULL) 333 if (handler->read == NULL)
267 handler->read = process_event_stub; 334 handler->read = process_event_stub;
268 if (handler->throttle == NULL) 335 if (handler->throttle == NULL)
@@ -270,13 +337,13 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
270 if (handler->unthrottle == NULL) 337 if (handler->unthrottle == NULL)
271 handler->unthrottle = process_event_stub; 338 handler->unthrottle = process_event_stub;
272 if (handler->attr == NULL) 339 if (handler->attr == NULL)
273 handler->attr = process_event_stub; 340 handler->attr = process_event_synth_stub;
274 if (handler->event_type == NULL) 341 if (handler->event_type == NULL)
275 handler->event_type = process_event_stub; 342 handler->event_type = process_event_synth_stub;
276 if (handler->tracing_data == NULL) 343 if (handler->tracing_data == NULL)
277 handler->tracing_data = process_event_stub; 344 handler->tracing_data = process_event_synth_stub;
278 if (handler->build_id == NULL) 345 if (handler->build_id == NULL)
279 handler->build_id = process_event_stub; 346 handler->build_id = process_event_synth_stub;
280 if (handler->finished_round == NULL) { 347 if (handler->finished_round == NULL) {
281 if (handler->ordered_samples) 348 if (handler->ordered_samples)
282 handler->finished_round = process_finished_round; 349 handler->finished_round = process_finished_round;
@@ -386,33 +453,61 @@ static event__swap_op event__swap_ops[] = {
386 453
387struct sample_queue { 454struct sample_queue {
388 u64 timestamp; 455 u64 timestamp;
389 struct sample_event *event; 456 u64 file_offset;
457 event_t *event;
390 struct list_head list; 458 struct list_head list;
391}; 459};
392 460
461static void perf_session_free_sample_buffers(struct perf_session *session)
462{
463 struct ordered_samples *os = &session->ordered_samples;
464
465 while (!list_empty(&os->to_free)) {
466 struct sample_queue *sq;
467
468 sq = list_entry(os->to_free.next, struct sample_queue, list);
469 list_del(&sq->list);
470 free(sq);
471 }
472}
473
474static int perf_session_deliver_event(struct perf_session *session,
475 event_t *event,
476 struct sample_data *sample,
477 struct perf_event_ops *ops,
478 u64 file_offset);
479
393static void flush_sample_queue(struct perf_session *s, 480static void flush_sample_queue(struct perf_session *s,
394 struct perf_event_ops *ops) 481 struct perf_event_ops *ops)
395{ 482{
396 struct list_head *head = &s->ordered_samples.samples_head; 483 struct ordered_samples *os = &s->ordered_samples;
397 u64 limit = s->ordered_samples.next_flush; 484 struct list_head *head = &os->samples;
398 struct sample_queue *tmp, *iter; 485 struct sample_queue *tmp, *iter;
486 struct sample_data sample;
487 u64 limit = os->next_flush;
488 u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
399 489
400 if (!ops->ordered_samples || !limit) 490 if (!ops->ordered_samples || !limit)
401 return; 491 return;
402 492
403 list_for_each_entry_safe(iter, tmp, head, list) { 493 list_for_each_entry_safe(iter, tmp, head, list) {
404 if (iter->timestamp > limit) 494 if (iter->timestamp > limit)
405 return; 495 break;
406 496
407 if (iter == s->ordered_samples.last_inserted) 497 event__parse_sample(iter->event, s, &sample);
408 s->ordered_samples.last_inserted = NULL; 498 perf_session_deliver_event(s, iter->event, &sample, ops,
499 iter->file_offset);
409 500
410 ops->sample((event_t *)iter->event, s); 501 os->last_flush = iter->timestamp;
411
412 s->ordered_samples.last_flush = iter->timestamp;
413 list_del(&iter->list); 502 list_del(&iter->list);
414 free(iter->event); 503 list_add(&iter->list, &os->sample_cache);
415 free(iter); 504 }
505
506 if (list_empty(head)) {
507 os->last_sample = NULL;
508 } else if (last_ts <= limit) {
509 os->last_sample =
510 list_entry(head->prev, struct sample_queue, list);
416 } 511 }
417} 512}
418 513
@@ -465,178 +560,265 @@ static int process_finished_round(event_t *event __used,
465 return 0; 560 return 0;
466} 561}
467 562
468static void __queue_sample_end(struct sample_queue *new, struct list_head *head)
469{
470 struct sample_queue *iter;
471
472 list_for_each_entry_reverse(iter, head, list) {
473 if (iter->timestamp < new->timestamp) {
474 list_add(&new->list, &iter->list);
475 return;
476 }
477 }
478
479 list_add(&new->list, head);
480}
481
482static void __queue_sample_before(struct sample_queue *new,
483 struct sample_queue *iter,
484 struct list_head *head)
485{
486 list_for_each_entry_continue_reverse(iter, head, list) {
487 if (iter->timestamp < new->timestamp) {
488 list_add(&new->list, &iter->list);
489 return;
490 }
491 }
492
493 list_add(&new->list, head);
494}
495
496static void __queue_sample_after(struct sample_queue *new,
497 struct sample_queue *iter,
498 struct list_head *head)
499{
500 list_for_each_entry_continue(iter, head, list) {
501 if (iter->timestamp > new->timestamp) {
502 list_add_tail(&new->list, &iter->list);
503 return;
504 }
505 }
506 list_add_tail(&new->list, head);
507}
508
509/* The queue is ordered by time */ 563/* The queue is ordered by time */
510static void __queue_sample_event(struct sample_queue *new, 564static void __queue_event(struct sample_queue *new, struct perf_session *s)
511 struct perf_session *s)
512{ 565{
513 struct sample_queue *last_inserted = s->ordered_samples.last_inserted; 566 struct ordered_samples *os = &s->ordered_samples;
514 struct list_head *head = &s->ordered_samples.samples_head; 567 struct sample_queue *sample = os->last_sample;
568 u64 timestamp = new->timestamp;
569 struct list_head *p;
515 570
571 os->last_sample = new;
516 572
517 if (!last_inserted) { 573 if (!sample) {
518 __queue_sample_end(new, head); 574 list_add(&new->list, &os->samples);
575 os->max_timestamp = timestamp;
519 return; 576 return;
520 } 577 }
521 578
522 /* 579 /*
523 * Most of the time the current event has a timestamp 580 * last_sample might point to some random place in the list as it's
524 * very close to the last event inserted, unless we just switched 581 * the last queued event. We expect that the new event is close to
525 * to another event buffer. Having a sorting based on a list and 582 * this.
526 * on the last inserted event that is close to the current one is
527 * probably more efficient than an rbtree based sorting.
528 */ 583 */
529 if (last_inserted->timestamp >= new->timestamp) 584 if (sample->timestamp <= timestamp) {
530 __queue_sample_before(new, last_inserted, head); 585 while (sample->timestamp <= timestamp) {
531 else 586 p = sample->list.next;
532 __queue_sample_after(new, last_inserted, head); 587 if (p == &os->samples) {
588 list_add_tail(&new->list, &os->samples);
589 os->max_timestamp = timestamp;
590 return;
591 }
592 sample = list_entry(p, struct sample_queue, list);
593 }
594 list_add_tail(&new->list, &sample->list);
595 } else {
596 while (sample->timestamp > timestamp) {
597 p = sample->list.prev;
598 if (p == &os->samples) {
599 list_add(&new->list, &os->samples);
600 return;
601 }
602 sample = list_entry(p, struct sample_queue, list);
603 }
604 list_add(&new->list, &sample->list);
605 }
533} 606}
534 607
535static int queue_sample_event(event_t *event, struct sample_data *data, 608#define MAX_SAMPLE_BUFFER (64 * 1024 / sizeof(struct sample_queue))
536 struct perf_session *s) 609
610static int perf_session_queue_event(struct perf_session *s, event_t *event,
611 struct sample_data *data, u64 file_offset)
537{ 612{
613 struct ordered_samples *os = &s->ordered_samples;
614 struct list_head *sc = &os->sample_cache;
538 u64 timestamp = data->time; 615 u64 timestamp = data->time;
539 struct sample_queue *new; 616 struct sample_queue *new;
540 617
618 if (!timestamp || timestamp == ~0ULL)
619 return -ETIME;
541 620
542 if (timestamp < s->ordered_samples.last_flush) { 621 if (timestamp < s->ordered_samples.last_flush) {
543 printf("Warning: Timestamp below last timeslice flush\n"); 622 printf("Warning: Timestamp below last timeslice flush\n");
544 return -EINVAL; 623 return -EINVAL;
545 } 624 }
546 625
547 new = malloc(sizeof(*new)); 626 if (!list_empty(sc)) {
548 if (!new) 627 new = list_entry(sc->next, struct sample_queue, list);
549 return -ENOMEM; 628 list_del(&new->list);
629 } else if (os->sample_buffer) {
630 new = os->sample_buffer + os->sample_buffer_idx;
631 if (++os->sample_buffer_idx == MAX_SAMPLE_BUFFER)
632 os->sample_buffer = NULL;
633 } else {
634 os->sample_buffer = malloc(MAX_SAMPLE_BUFFER * sizeof(*new));
635 if (!os->sample_buffer)
636 return -ENOMEM;
637 list_add(&os->sample_buffer->list, &os->to_free);
638 os->sample_buffer_idx = 2;
639 new = os->sample_buffer + 1;
640 }
550 641
551 new->timestamp = timestamp; 642 new->timestamp = timestamp;
643 new->file_offset = file_offset;
644 new->event = event;
552 645
553 new->event = malloc(event->header.size); 646 __queue_event(new, s);
554 if (!new->event) {
555 free(new);
556 return -ENOMEM;
557 }
558 647
559 memcpy(new->event, event, event->header.size); 648 return 0;
649}
560 650
561 __queue_sample_event(new, s); 651static void callchain__printf(struct sample_data *sample)
562 s->ordered_samples.last_inserted = new; 652{
653 unsigned int i;
563 654
564 if (new->timestamp > s->ordered_samples.max_timestamp) 655 printf("... chain: nr:%Lu\n", sample->callchain->nr);
565 s->ordered_samples.max_timestamp = new->timestamp;
566 656
567 return 0; 657 for (i = 0; i < sample->callchain->nr; i++)
658 printf("..... %2d: %016Lx\n", i, sample->callchain->ips[i]);
568} 659}
569 660
570static int perf_session__process_sample(event_t *event, struct perf_session *s, 661static void perf_session__print_tstamp(struct perf_session *session,
571 struct perf_event_ops *ops) 662 event_t *event,
663 struct sample_data *sample)
572{ 664{
573 struct sample_data data; 665 if (event->header.type != PERF_RECORD_SAMPLE &&
666 !session->sample_id_all) {
667 fputs("-1 -1 ", stdout);
668 return;
669 }
574 670
575 if (!ops->ordered_samples) 671 if ((session->sample_type & PERF_SAMPLE_CPU))
576 return ops->sample(event, s); 672 printf("%u ", sample->cpu);
577 673
578 bzero(&data, sizeof(struct sample_data)); 674 if (session->sample_type & PERF_SAMPLE_TIME)
579 event__parse_sample(event, s->sample_type, &data); 675 printf("%Lu ", sample->time);
676}
580 677
581 queue_sample_event(event, &data, s); 678static void dump_event(struct perf_session *session, event_t *event,
679 u64 file_offset, struct sample_data *sample)
680{
681 if (!dump_trace)
682 return;
582 683
583 return 0; 684 printf("\n%#Lx [%#x]: event: %d\n", file_offset, event->header.size,
685 event->header.type);
686
687 trace_event(event);
688
689 if (sample)
690 perf_session__print_tstamp(session, event, sample);
691
692 printf("%#Lx [%#x]: PERF_RECORD_%s", file_offset, event->header.size,
693 event__get_event_name(event->header.type));
584} 694}
585 695
586static int perf_session__process_event(struct perf_session *self, 696static void dump_sample(struct perf_session *session, event_t *event,
587 event_t *event, 697 struct sample_data *sample)
588 struct perf_event_ops *ops,
589 u64 offset, u64 head)
590{ 698{
591 trace_event(event); 699 if (!dump_trace)
700 return;
592 701
593 if (event->header.type < PERF_RECORD_HEADER_MAX) { 702 printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
594 dump_printf("%#Lx [%#x]: PERF_RECORD_%s", 703 sample->pid, sample->tid, sample->ip, sample->period);
595 offset + head, event->header.size,
596 event__name[event->header.type]);
597 hists__inc_nr_events(&self->hists, event->header.type);
598 }
599 704
600 if (self->header.needs_swap && event__swap_ops[event->header.type]) 705 if (session->sample_type & PERF_SAMPLE_CALLCHAIN)
601 event__swap_ops[event->header.type](event); 706 callchain__printf(sample);
707}
708
709static int perf_session_deliver_event(struct perf_session *session,
710 event_t *event,
711 struct sample_data *sample,
712 struct perf_event_ops *ops,
713 u64 file_offset)
714{
715 dump_event(session, event, file_offset, sample);
602 716
603 switch (event->header.type) { 717 switch (event->header.type) {
604 case PERF_RECORD_SAMPLE: 718 case PERF_RECORD_SAMPLE:
605 return perf_session__process_sample(event, self, ops); 719 dump_sample(session, event, sample);
720 return ops->sample(event, sample, session);
606 case PERF_RECORD_MMAP: 721 case PERF_RECORD_MMAP:
607 return ops->mmap(event, self); 722 return ops->mmap(event, sample, session);
608 case PERF_RECORD_COMM: 723 case PERF_RECORD_COMM:
609 return ops->comm(event, self); 724 return ops->comm(event, sample, session);
610 case PERF_RECORD_FORK: 725 case PERF_RECORD_FORK:
611 return ops->fork(event, self); 726 return ops->fork(event, sample, session);
612 case PERF_RECORD_EXIT: 727 case PERF_RECORD_EXIT:
613 return ops->exit(event, self); 728 return ops->exit(event, sample, session);
614 case PERF_RECORD_LOST: 729 case PERF_RECORD_LOST:
615 return ops->lost(event, self); 730 return ops->lost(event, sample, session);
616 case PERF_RECORD_READ: 731 case PERF_RECORD_READ:
617 return ops->read(event, self); 732 return ops->read(event, sample, session);
618 case PERF_RECORD_THROTTLE: 733 case PERF_RECORD_THROTTLE:
619 return ops->throttle(event, self); 734 return ops->throttle(event, sample, session);
620 case PERF_RECORD_UNTHROTTLE: 735 case PERF_RECORD_UNTHROTTLE:
621 return ops->unthrottle(event, self); 736 return ops->unthrottle(event, sample, session);
737 default:
738 ++session->hists.stats.nr_unknown_events;
739 return -1;
740 }
741}
742
743static int perf_session__preprocess_sample(struct perf_session *session,
744 event_t *event, struct sample_data *sample)
745{
746 if (event->header.type != PERF_RECORD_SAMPLE ||
747 !(session->sample_type & PERF_SAMPLE_CALLCHAIN))
748 return 0;
749
750 if (!ip_callchain__valid(sample->callchain, event)) {
751 pr_debug("call-chain problem with event, skipping it.\n");
752 ++session->hists.stats.nr_invalid_chains;
753 session->hists.stats.total_invalid_chains += sample->period;
754 return -EINVAL;
755 }
756 return 0;
757}
758
759static int perf_session__process_user_event(struct perf_session *session, event_t *event,
760 struct perf_event_ops *ops, u64 file_offset)
761{
762 dump_event(session, event, file_offset, NULL);
763
764 /* These events are processed right away */
765 switch (event->header.type) {
622 case PERF_RECORD_HEADER_ATTR: 766 case PERF_RECORD_HEADER_ATTR:
623 return ops->attr(event, self); 767 return ops->attr(event, session);
624 case PERF_RECORD_HEADER_EVENT_TYPE: 768 case PERF_RECORD_HEADER_EVENT_TYPE:
625 return ops->event_type(event, self); 769 return ops->event_type(event, session);
626 case PERF_RECORD_HEADER_TRACING_DATA: 770 case PERF_RECORD_HEADER_TRACING_DATA:
627 /* setup for reading amidst mmap */ 771 /* setup for reading amidst mmap */
628 lseek(self->fd, offset + head, SEEK_SET); 772 lseek(session->fd, file_offset, SEEK_SET);
629 return ops->tracing_data(event, self); 773 return ops->tracing_data(event, session);
630 case PERF_RECORD_HEADER_BUILD_ID: 774 case PERF_RECORD_HEADER_BUILD_ID:
631 return ops->build_id(event, self); 775 return ops->build_id(event, session);
632 case PERF_RECORD_FINISHED_ROUND: 776 case PERF_RECORD_FINISHED_ROUND:
633 return ops->finished_round(event, self, ops); 777 return ops->finished_round(event, session, ops);
634 default: 778 default:
635 ++self->hists.stats.nr_unknown_events; 779 return -EINVAL;
636 return -1;
637 } 780 }
638} 781}
639 782
783static int perf_session__process_event(struct perf_session *session,
784 event_t *event,
785 struct perf_event_ops *ops,
786 u64 file_offset)
787{
788 struct sample_data sample;
789 int ret;
790
791 if (session->header.needs_swap && event__swap_ops[event->header.type])
792 event__swap_ops[event->header.type](event);
793
794 if (event->header.type >= PERF_RECORD_HEADER_MAX)
795 return -EINVAL;
796
797 hists__inc_nr_events(&session->hists, event->header.type);
798
799 if (event->header.type >= PERF_RECORD_USER_TYPE_START)
800 return perf_session__process_user_event(session, event, ops, file_offset);
801
802 /*
803 * For all kernel events we get the sample data
804 */
805 event__parse_sample(event, session, &sample);
806
807 /* Preprocess sample records - precheck callchains */
808 if (perf_session__preprocess_sample(session, event, &sample))
809 return 0;
810
811 if (ops->ordered_samples) {
812 ret = perf_session_queue_event(session, event, &sample,
813 file_offset);
814 if (ret != -ETIME)
815 return ret;
816 }
817
818 return perf_session_deliver_event(session, event, &sample, ops,
819 file_offset);
820}
821
640void perf_event_header__bswap(struct perf_event_header *self) 822void perf_event_header__bswap(struct perf_event_header *self)
641{ 823{
642 self->type = bswap_32(self->type); 824 self->type = bswap_32(self->type);
@@ -656,21 +838,33 @@ static struct thread *perf_session__register_idle_thread(struct perf_session *se
656 return thread; 838 return thread;
657} 839}
658 840
659int do_read(int fd, void *buf, size_t size) 841static void perf_session__warn_about_errors(const struct perf_session *session,
842 const struct perf_event_ops *ops)
660{ 843{
661 void *buf_start = buf; 844 if (ops->lost == event__process_lost &&
662 845 session->hists.stats.total_lost != 0) {
663 while (size) { 846 ui__warning("Processed %Lu events and LOST %Lu!\n\n"
664 int ret = read(fd, buf, size); 847 "Check IO/CPU overload!\n\n",
665 848 session->hists.stats.total_period,
666 if (ret <= 0) 849 session->hists.stats.total_lost);
667 return ret; 850 }
668 851
669 size -= ret; 852 if (session->hists.stats.nr_unknown_events != 0) {
670 buf += ret; 853 ui__warning("Found %u unknown events!\n\n"
854 "Is this an older tool processing a perf.data "
855 "file generated by a more recent tool?\n\n"
856 "If that is not the case, consider "
857 "reporting to linux-kernel@vger.kernel.org.\n\n",
858 session->hists.stats.nr_unknown_events);
671 } 859 }
672 860
673 return buf - buf_start; 861 if (session->hists.stats.nr_invalid_chains != 0) {
862 ui__warning("Found invalid callchains!\n\n"
863 "%u out of %u events were discarded for this reason.\n\n"
864 "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
865 session->hists.stats.nr_invalid_chains,
866 session->hists.stats.nr_events[PERF_RECORD_SAMPLE]);
867 }
674} 868}
675 869
676#define session_done() (*(volatile int *)(&session_done)) 870#define session_done() (*(volatile int *)(&session_done))
@@ -690,7 +884,7 @@ static int __perf_session__process_pipe_events(struct perf_session *self,
690 884
691 head = 0; 885 head = 0;
692more: 886more:
693 err = do_read(self->fd, &event, sizeof(struct perf_event_header)); 887 err = readn(self->fd, &event, sizeof(struct perf_event_header));
694 if (err <= 0) { 888 if (err <= 0) {
695 if (err == 0) 889 if (err == 0)
696 goto done; 890 goto done;
@@ -710,8 +904,7 @@ more:
710 p += sizeof(struct perf_event_header); 904 p += sizeof(struct perf_event_header);
711 905
712 if (size - sizeof(struct perf_event_header)) { 906 if (size - sizeof(struct perf_event_header)) {
713 err = do_read(self->fd, p, 907 err = readn(self->fd, p, size - sizeof(struct perf_event_header));
714 size - sizeof(struct perf_event_header));
715 if (err <= 0) { 908 if (err <= 0) {
716 if (err == 0) { 909 if (err == 0) {
717 pr_err("unexpected end of event stream\n"); 910 pr_err("unexpected end of event stream\n");
@@ -724,8 +917,7 @@ more:
724 } 917 }
725 918
726 if (size == 0 || 919 if (size == 0 ||
727 (skip = perf_session__process_event(self, &event, ops, 920 (skip = perf_session__process_event(self, &event, ops, head)) < 0) {
728 0, head)) < 0) {
729 dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n", 921 dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
730 head, event.header.size, event.header.type); 922 head, event.header.size, event.header.type);
731 /* 923 /*
@@ -740,9 +932,6 @@ more:
740 932
741 head += size; 933 head += size;
742 934
743 dump_printf("\n%#Lx [%#x]: event: %d\n",
744 head, event.header.size, event.header.type);
745
746 if (skip > 0) 935 if (skip > 0)
747 head += skip; 936 head += skip;
748 937
@@ -751,82 +940,91 @@ more:
751done: 940done:
752 err = 0; 941 err = 0;
753out_err: 942out_err:
943 perf_session__warn_about_errors(self, ops);
944 perf_session_free_sample_buffers(self);
754 return err; 945 return err;
755} 946}
756 947
757int __perf_session__process_events(struct perf_session *self, 948int __perf_session__process_events(struct perf_session *session,
758 u64 data_offset, u64 data_size, 949 u64 data_offset, u64 data_size,
759 u64 file_size, struct perf_event_ops *ops) 950 u64 file_size, struct perf_event_ops *ops)
760{ 951{
761 int err, mmap_prot, mmap_flags; 952 u64 head, page_offset, file_offset, file_pos, progress_next;
762 u64 head, shift; 953 int err, mmap_prot, mmap_flags, map_idx = 0;
763 u64 offset = 0; 954 struct ui_progress *progress;
764 size_t page_size; 955 size_t page_size, mmap_size;
956 char *buf, *mmaps[8];
765 event_t *event; 957 event_t *event;
766 uint32_t size; 958 uint32_t size;
767 char *buf;
768 struct ui_progress *progress = ui_progress__new("Processing events...",
769 self->size);
770 if (progress == NULL)
771 return -1;
772 959
773 perf_event_ops__fill_defaults(ops); 960 perf_event_ops__fill_defaults(ops);
774 961
775 page_size = sysconf(_SC_PAGESIZE); 962 page_size = sysconf(_SC_PAGESIZE);
776 963
777 head = data_offset; 964 page_offset = page_size * (data_offset / page_size);
778 shift = page_size * (head / page_size); 965 file_offset = page_offset;
779 offset += shift; 966 head = data_offset - page_offset;
780 head -= shift; 967
968 if (data_offset + data_size < file_size)
969 file_size = data_offset + data_size;
970
971 progress_next = file_size / 16;
972 progress = ui_progress__new("Processing events...", file_size);
973 if (progress == NULL)
974 return -1;
975
976 mmap_size = session->mmap_window;
977 if (mmap_size > file_size)
978 mmap_size = file_size;
979
980 memset(mmaps, 0, sizeof(mmaps));
781 981
782 mmap_prot = PROT_READ; 982 mmap_prot = PROT_READ;
783 mmap_flags = MAP_SHARED; 983 mmap_flags = MAP_SHARED;
784 984
785 if (self->header.needs_swap) { 985 if (session->header.needs_swap) {
786 mmap_prot |= PROT_WRITE; 986 mmap_prot |= PROT_WRITE;
787 mmap_flags = MAP_PRIVATE; 987 mmap_flags = MAP_PRIVATE;
788 } 988 }
789remap: 989remap:
790 buf = mmap(NULL, page_size * self->mmap_window, mmap_prot, 990 buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, session->fd,
791 mmap_flags, self->fd, offset); 991 file_offset);
792 if (buf == MAP_FAILED) { 992 if (buf == MAP_FAILED) {
793 pr_err("failed to mmap file\n"); 993 pr_err("failed to mmap file\n");
794 err = -errno; 994 err = -errno;
795 goto out_err; 995 goto out_err;
796 } 996 }
997 mmaps[map_idx] = buf;
998 map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
999 file_pos = file_offset + head;
797 1000
798more: 1001more:
799 event = (event_t *)(buf + head); 1002 event = (event_t *)(buf + head);
800 ui_progress__update(progress, offset);
801 1003
802 if (self->header.needs_swap) 1004 if (session->header.needs_swap)
803 perf_event_header__bswap(&event->header); 1005 perf_event_header__bswap(&event->header);
804 size = event->header.size; 1006 size = event->header.size;
805 if (size == 0) 1007 if (size == 0)
806 size = 8; 1008 size = 8;
807 1009
808 if (head + event->header.size >= page_size * self->mmap_window) { 1010 if (head + event->header.size >= mmap_size) {
809 int munmap_ret; 1011 if (mmaps[map_idx]) {
810 1012 munmap(mmaps[map_idx], mmap_size);
811 shift = page_size * (head / page_size); 1013 mmaps[map_idx] = NULL;
812 1014 }
813 munmap_ret = munmap(buf, page_size * self->mmap_window);
814 assert(munmap_ret == 0);
815 1015
816 offset += shift; 1016 page_offset = page_size * (head / page_size);
817 head -= shift; 1017 file_offset += page_offset;
1018 head -= page_offset;
818 goto remap; 1019 goto remap;
819 } 1020 }
820 1021
821 size = event->header.size; 1022 size = event->header.size;
822 1023
823 dump_printf("\n%#Lx [%#x]: event: %d\n",
824 offset + head, event->header.size, event->header.type);
825
826 if (size == 0 || 1024 if (size == 0 ||
827 perf_session__process_event(self, event, ops, offset, head) < 0) { 1025 perf_session__process_event(session, event, ops, file_pos) < 0) {
828 dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n", 1026 dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
829 offset + head, event->header.size, 1027 file_offset + head, event->header.size,
830 event->header.type); 1028 event->header.type);
831 /* 1029 /*
832 * assume we lost track of the stream, check alignment, and 1030 * assume we lost track of the stream, check alignment, and
@@ -839,19 +1037,24 @@ more:
839 } 1037 }
840 1038
841 head += size; 1039 head += size;
1040 file_pos += size;
842 1041
843 if (offset + head >= data_offset + data_size) 1042 if (file_pos >= progress_next) {
844 goto done; 1043 progress_next += file_size / 16;
1044 ui_progress__update(progress, file_pos);
1045 }
845 1046
846 if (offset + head < file_size) 1047 if (file_pos < file_size)
847 goto more; 1048 goto more;
848done: 1049
849 err = 0; 1050 err = 0;
850 /* do the final flush for ordered samples */ 1051 /* do the final flush for ordered samples */
851 self->ordered_samples.next_flush = ULLONG_MAX; 1052 session->ordered_samples.next_flush = ULLONG_MAX;
852 flush_sample_queue(self, ops); 1053 flush_sample_queue(session, ops);
853out_err: 1054out_err:
854 ui_progress__delete(progress); 1055 ui_progress__delete(progress);
1056 perf_session__warn_about_errors(session, ops);
1057 perf_session_free_sample_buffers(session);
855 return err; 1058 return err;
856} 1059}
857 1060
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 9fa0fc2a863f..decd83f274fd 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -17,8 +17,12 @@ struct ordered_samples {
17 u64 last_flush; 17 u64 last_flush;
18 u64 next_flush; 18 u64 next_flush;
19 u64 max_timestamp; 19 u64 max_timestamp;
20 struct list_head samples_head; 20 struct list_head samples;
21 struct sample_queue *last_inserted; 21 struct list_head sample_cache;
22 struct list_head to_free;
23 struct sample_queue *sample_buffer;
24 struct sample_queue *last_sample;
25 int sample_buffer_idx;
22}; 26};
23 27
24struct perf_session { 28struct perf_session {
@@ -42,6 +46,8 @@ struct perf_session {
42 int fd; 46 int fd;
43 bool fd_pipe; 47 bool fd_pipe;
44 bool repipe; 48 bool repipe;
49 bool sample_id_all;
50 u16 id_hdr_size;
45 int cwdlen; 51 int cwdlen;
46 char *cwd; 52 char *cwd;
47 struct ordered_samples ordered_samples; 53 struct ordered_samples ordered_samples;
@@ -50,7 +56,9 @@ struct perf_session {
50 56
51struct perf_event_ops; 57struct perf_event_ops;
52 58
53typedef int (*event_op)(event_t *self, struct perf_session *session); 59typedef int (*event_op)(event_t *self, struct sample_data *sample,
60 struct perf_session *session);
61typedef int (*event_synth_op)(event_t *self, struct perf_session *session);
54typedef int (*event_op2)(event_t *self, struct perf_session *session, 62typedef int (*event_op2)(event_t *self, struct perf_session *session,
55 struct perf_event_ops *ops); 63 struct perf_event_ops *ops);
56 64
@@ -63,16 +71,19 @@ struct perf_event_ops {
63 lost, 71 lost,
64 read, 72 read,
65 throttle, 73 throttle,
66 unthrottle, 74 unthrottle;
67 attr, 75 event_synth_op attr,
68 event_type, 76 event_type,
69 tracing_data, 77 tracing_data,
70 build_id; 78 build_id;
71 event_op2 finished_round; 79 event_op2 finished_round;
72 bool ordered_samples; 80 bool ordered_samples;
81 bool ordering_requires_timestamps;
73}; 82};
74 83
75struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe); 84struct perf_session *perf_session__new(const char *filename, int mode,
85 bool force, bool repipe,
86 struct perf_event_ops *ops);
76void perf_session__delete(struct perf_session *self); 87void perf_session__delete(struct perf_session *self);
77 88
78void perf_event_header__bswap(struct perf_event_header *self); 89void perf_event_header__bswap(struct perf_event_header *self);
@@ -98,8 +109,9 @@ void mem_bswap_64(void *src, int byte_size);
98 109
99int perf_session__create_kernel_maps(struct perf_session *self); 110int perf_session__create_kernel_maps(struct perf_session *self);
100 111
101int do_read(int fd, void *buf, size_t size);
102void perf_session__update_sample_type(struct perf_session *self); 112void perf_session__update_sample_type(struct perf_session *self);
113void perf_session__set_sample_id_all(struct perf_session *session, bool value);
114void perf_session__set_sample_type(struct perf_session *session, u64 type);
103void perf_session__remove_thread(struct perf_session *self, struct thread *th); 115void perf_session__remove_thread(struct perf_session *self, struct thread *th);
104 116
105static inline 117static inline
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index b62a553cc67d..f44fa541d56e 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -170,7 +170,7 @@ static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf,
170 return repsep_snprintf(bf, size, "%-*s", width, dso_name); 170 return repsep_snprintf(bf, size, "%-*s", width, dso_name);
171 } 171 }
172 172
173 return repsep_snprintf(bf, size, "%*Lx", width, self->ip); 173 return repsep_snprintf(bf, size, "%-*s", width, "[unknown]");
174} 174}
175 175
176/* --sort symbol */ 176/* --sort symbol */
@@ -196,7 +196,7 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
196 196
197 if (verbose) { 197 if (verbose) {
198 char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!'; 198 char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!';
199 ret += repsep_snprintf(bf, size, "%*Lx %c ", 199 ret += repsep_snprintf(bf, size, "%-#*llx %c ",
200 BITS_PER_LONG / 4, self->ip, o); 200 BITS_PER_LONG / 4, self->ip, o);
201 } 201 }
202 202
@@ -205,7 +205,7 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
205 ret += repsep_snprintf(bf + ret, size - ret, "%s", 205 ret += repsep_snprintf(bf + ret, size - ret, "%s",
206 self->ms.sym->name); 206 self->ms.sym->name);
207 else 207 else
208 ret += repsep_snprintf(bf + ret, size - ret, "%*Lx", 208 ret += repsep_snprintf(bf + ret, size - ret, "%-#*llx",
209 BITS_PER_LONG / 4, self->ip); 209 BITS_PER_LONG / 4, self->ip);
210 210
211 return ret; 211 return ret;
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index 0409fc7c0058..8fc0bd3a3a4a 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -259,7 +259,7 @@ static bool __match_glob(const char *str, const char *pat, bool ignore_space)
259 if (!*pat) /* Tail wild card matches all */ 259 if (!*pat) /* Tail wild card matches all */
260 return true; 260 return true;
261 while (*str) 261 while (*str)
262 if (strglobmatch(str++, pat)) 262 if (__match_glob(str++, pat, ignore_space))
263 return true; 263 return true;
264 } 264 }
265 return !*str && !*pat; 265 return !*str && !*pat;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index d628c8d1cf5e..15ccfba8cdf8 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -22,6 +22,10 @@
22#include <limits.h> 22#include <limits.h>
23#include <sys/utsname.h> 23#include <sys/utsname.h>
24 24
25#ifndef KSYM_NAME_LEN
26#define KSYM_NAME_LEN 128
27#endif
28
25#ifndef NT_GNU_BUILD_ID 29#ifndef NT_GNU_BUILD_ID
26#define NT_GNU_BUILD_ID 3 30#define NT_GNU_BUILD_ID 3
27#endif 31#endif
@@ -41,6 +45,7 @@ struct symbol_conf symbol_conf = {
41 .exclude_other = true, 45 .exclude_other = true,
42 .use_modules = true, 46 .use_modules = true,
43 .try_vmlinux_path = true, 47 .try_vmlinux_path = true,
48 .symfs = "",
44}; 49};
45 50
46int dso__name_len(const struct dso *self) 51int dso__name_len(const struct dso *self)
@@ -92,7 +97,7 @@ static void symbols__fixup_end(struct rb_root *self)
92 prev = curr; 97 prev = curr;
93 curr = rb_entry(nd, struct symbol, rb_node); 98 curr = rb_entry(nd, struct symbol, rb_node);
94 99
95 if (prev->end == prev->start) 100 if (prev->end == prev->start && prev->end != curr->start)
96 prev->end = curr->start - 1; 101 prev->end = curr->start - 1;
97 } 102 }
98 103
@@ -121,7 +126,7 @@ static void __map_groups__fixup_end(struct map_groups *self, enum map_type type)
121 * We still haven't the actual symbols, so guess the 126 * We still haven't the actual symbols, so guess the
122 * last map final address. 127 * last map final address.
123 */ 128 */
124 curr->end = ~0UL; 129 curr->end = ~0ULL;
125} 130}
126 131
127static void map_groups__fixup_end(struct map_groups *self) 132static void map_groups__fixup_end(struct map_groups *self)
@@ -425,16 +430,25 @@ size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp)
425 430
426int kallsyms__parse(const char *filename, void *arg, 431int kallsyms__parse(const char *filename, void *arg,
427 int (*process_symbol)(void *arg, const char *name, 432 int (*process_symbol)(void *arg, const char *name,
428 char type, u64 start)) 433 char type, u64 start, u64 end))
429{ 434{
430 char *line = NULL; 435 char *line = NULL;
431 size_t n; 436 size_t n;
432 int err = 0; 437 int err = -1;
438 u64 prev_start = 0;
439 char prev_symbol_type = 0;
440 char *prev_symbol_name;
433 FILE *file = fopen(filename, "r"); 441 FILE *file = fopen(filename, "r");
434 442
435 if (file == NULL) 443 if (file == NULL)
436 goto out_failure; 444 goto out_failure;
437 445
446 prev_symbol_name = malloc(KSYM_NAME_LEN);
447 if (prev_symbol_name == NULL)
448 goto out_close;
449
450 err = 0;
451
438 while (!feof(file)) { 452 while (!feof(file)) {
439 u64 start; 453 u64 start;
440 int line_len, len; 454 int line_len, len;
@@ -454,14 +468,33 @@ int kallsyms__parse(const char *filename, void *arg,
454 continue; 468 continue;
455 469
456 symbol_type = toupper(line[len]); 470 symbol_type = toupper(line[len]);
457 symbol_name = line + len + 2; 471 len += 2;
472 symbol_name = line + len;
473 len = line_len - len;
458 474
459 err = process_symbol(arg, symbol_name, symbol_type, start); 475 if (len >= KSYM_NAME_LEN) {
460 if (err) 476 err = -1;
461 break; 477 break;
478 }
479
480 if (prev_symbol_type) {
481 u64 end = start;
482 if (end != prev_start)
483 --end;
484 err = process_symbol(arg, prev_symbol_name,
485 prev_symbol_type, prev_start, end);
486 if (err)
487 break;
488 }
489
490 memcpy(prev_symbol_name, symbol_name, len + 1);
491 prev_symbol_type = symbol_type;
492 prev_start = start;
462 } 493 }
463 494
495 free(prev_symbol_name);
464 free(line); 496 free(line);
497out_close:
465 fclose(file); 498 fclose(file);
466 return err; 499 return err;
467 500
@@ -483,7 +516,7 @@ static u8 kallsyms2elf_type(char type)
483} 516}
484 517
485static int map__process_kallsym_symbol(void *arg, const char *name, 518static int map__process_kallsym_symbol(void *arg, const char *name,
486 char type, u64 start) 519 char type, u64 start, u64 end)
487{ 520{
488 struct symbol *sym; 521 struct symbol *sym;
489 struct process_kallsyms_args *a = arg; 522 struct process_kallsyms_args *a = arg;
@@ -492,11 +525,8 @@ static int map__process_kallsym_symbol(void *arg, const char *name,
492 if (!symbol_type__is_a(type, a->map->type)) 525 if (!symbol_type__is_a(type, a->map->type))
493 return 0; 526 return 0;
494 527
495 /* 528 sym = symbol__new(start, end - start + 1,
496 * Will fix up the end later, when we have all symbols sorted. 529 kallsyms2elf_type(type), name);
497 */
498 sym = symbol__new(start, 0, kallsyms2elf_type(type), name);
499
500 if (sym == NULL) 530 if (sym == NULL)
501 return -ENOMEM; 531 return -ENOMEM;
502 /* 532 /*
@@ -649,7 +679,6 @@ int dso__load_kallsyms(struct dso *self, const char *filename,
649 if (dso__load_all_kallsyms(self, filename, map) < 0) 679 if (dso__load_all_kallsyms(self, filename, map) < 0)
650 return -1; 680 return -1;
651 681
652 symbols__fixup_end(&self->symbols[map->type]);
653 if (self->kernel == DSO_TYPE_GUEST_KERNEL) 682 if (self->kernel == DSO_TYPE_GUEST_KERNEL)
654 self->origin = DSO__ORIG_GUEST_KERNEL; 683 self->origin = DSO__ORIG_GUEST_KERNEL;
655 else 684 else
@@ -839,8 +868,11 @@ static int dso__synthesize_plt_symbols(struct dso *self, struct map *map,
839 char sympltname[1024]; 868 char sympltname[1024];
840 Elf *elf; 869 Elf *elf;
841 int nr = 0, symidx, fd, err = 0; 870 int nr = 0, symidx, fd, err = 0;
871 char name[PATH_MAX];
842 872
843 fd = open(self->long_name, O_RDONLY); 873 snprintf(name, sizeof(name), "%s%s",
874 symbol_conf.symfs, self->long_name);
875 fd = open(name, O_RDONLY);
844 if (fd < 0) 876 if (fd < 0)
845 goto out; 877 goto out;
846 878
@@ -1452,16 +1484,19 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
1452 self->origin++) { 1484 self->origin++) {
1453 switch (self->origin) { 1485 switch (self->origin) {
1454 case DSO__ORIG_BUILD_ID_CACHE: 1486 case DSO__ORIG_BUILD_ID_CACHE:
1455 if (dso__build_id_filename(self, name, size) == NULL) 1487 /* skip the locally configured cache if a symfs is given */
1488 if (symbol_conf.symfs[0] ||
1489 (dso__build_id_filename(self, name, size) == NULL)) {
1456 continue; 1490 continue;
1491 }
1457 break; 1492 break;
1458 case DSO__ORIG_FEDORA: 1493 case DSO__ORIG_FEDORA:
1459 snprintf(name, size, "/usr/lib/debug%s.debug", 1494 snprintf(name, size, "%s/usr/lib/debug%s.debug",
1460 self->long_name); 1495 symbol_conf.symfs, self->long_name);
1461 break; 1496 break;
1462 case DSO__ORIG_UBUNTU: 1497 case DSO__ORIG_UBUNTU:
1463 snprintf(name, size, "/usr/lib/debug%s", 1498 snprintf(name, size, "%s/usr/lib/debug%s",
1464 self->long_name); 1499 symbol_conf.symfs, self->long_name);
1465 break; 1500 break;
1466 case DSO__ORIG_BUILDID: { 1501 case DSO__ORIG_BUILDID: {
1467 char build_id_hex[BUILD_ID_SIZE * 2 + 1]; 1502 char build_id_hex[BUILD_ID_SIZE * 2 + 1];
@@ -1473,19 +1508,26 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
1473 sizeof(self->build_id), 1508 sizeof(self->build_id),
1474 build_id_hex); 1509 build_id_hex);
1475 snprintf(name, size, 1510 snprintf(name, size,
1476 "/usr/lib/debug/.build-id/%.2s/%s.debug", 1511 "%s/usr/lib/debug/.build-id/%.2s/%s.debug",
1477 build_id_hex, build_id_hex + 2); 1512 symbol_conf.symfs, build_id_hex, build_id_hex + 2);
1478 } 1513 }
1479 break; 1514 break;
1480 case DSO__ORIG_DSO: 1515 case DSO__ORIG_DSO:
1481 snprintf(name, size, "%s", self->long_name); 1516 snprintf(name, size, "%s%s",
1517 symbol_conf.symfs, self->long_name);
1482 break; 1518 break;
1483 case DSO__ORIG_GUEST_KMODULE: 1519 case DSO__ORIG_GUEST_KMODULE:
1484 if (map->groups && map->groups->machine) 1520 if (map->groups && map->groups->machine)
1485 root_dir = map->groups->machine->root_dir; 1521 root_dir = map->groups->machine->root_dir;
1486 else 1522 else
1487 root_dir = ""; 1523 root_dir = "";
1488 snprintf(name, size, "%s%s", root_dir, self->long_name); 1524 snprintf(name, size, "%s%s%s", symbol_conf.symfs,
1525 root_dir, self->long_name);
1526 break;
1527
1528 case DSO__ORIG_KMODULE:
1529 snprintf(name, size, "%s%s", symbol_conf.symfs,
1530 self->long_name);
1489 break; 1531 break;
1490 1532
1491 default: 1533 default:
@@ -1780,21 +1822,24 @@ out_failure:
1780 return -1; 1822 return -1;
1781} 1823}
1782 1824
1783static int dso__load_vmlinux(struct dso *self, struct map *map, 1825int dso__load_vmlinux(struct dso *self, struct map *map,
1784 const char *vmlinux, symbol_filter_t filter) 1826 const char *vmlinux, symbol_filter_t filter)
1785{ 1827{
1786 int err = -1, fd; 1828 int err = -1, fd;
1829 char symfs_vmlinux[PATH_MAX];
1787 1830
1788 fd = open(vmlinux, O_RDONLY); 1831 snprintf(symfs_vmlinux, sizeof(symfs_vmlinux), "%s/%s",
1832 symbol_conf.symfs, vmlinux);
1833 fd = open(symfs_vmlinux, O_RDONLY);
1789 if (fd < 0) 1834 if (fd < 0)
1790 return -1; 1835 return -1;
1791 1836
1792 dso__set_loaded(self, map->type); 1837 dso__set_loaded(self, map->type);
1793 err = dso__load_sym(self, map, vmlinux, fd, filter, 0, 0); 1838 err = dso__load_sym(self, map, symfs_vmlinux, fd, filter, 0, 0);
1794 close(fd); 1839 close(fd);
1795 1840
1796 if (err > 0) 1841 if (err > 0)
1797 pr_debug("Using %s for symbols\n", vmlinux); 1842 pr_debug("Using %s for symbols\n", symfs_vmlinux);
1798 1843
1799 return err; 1844 return err;
1800} 1845}
@@ -1836,8 +1881,8 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
1836 const char *kallsyms_filename = NULL; 1881 const char *kallsyms_filename = NULL;
1837 char *kallsyms_allocated_filename = NULL; 1882 char *kallsyms_allocated_filename = NULL;
1838 /* 1883 /*
1839 * Step 1: if the user specified a vmlinux filename, use it and only 1884 * Step 1: if the user specified a kallsyms or vmlinux filename, use
1840 * it, reporting errors to the user if it cannot be used. 1885 * it and only it, reporting errors to the user if it cannot be used.
1841 * 1886 *
1842 * For instance, try to analyse an ARM perf.data file _without_ a 1887 * For instance, try to analyse an ARM perf.data file _without_ a
1843 * build-id, or if the user specifies the wrong path to the right 1888 * build-id, or if the user specifies the wrong path to the right
@@ -1850,6 +1895,11 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
1850 * validation in dso__load_vmlinux and will bail out if they don't 1895 * validation in dso__load_vmlinux and will bail out if they don't
1851 * match. 1896 * match.
1852 */ 1897 */
1898 if (symbol_conf.kallsyms_name != NULL) {
1899 kallsyms_filename = symbol_conf.kallsyms_name;
1900 goto do_kallsyms;
1901 }
1902
1853 if (symbol_conf.vmlinux_name != NULL) { 1903 if (symbol_conf.vmlinux_name != NULL) {
1854 err = dso__load_vmlinux(self, map, 1904 err = dso__load_vmlinux(self, map,
1855 symbol_conf.vmlinux_name, filter); 1905 symbol_conf.vmlinux_name, filter);
@@ -1867,6 +1917,10 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
1867 goto out_fixup; 1917 goto out_fixup;
1868 } 1918 }
1869 1919
1920 /* do not try local files if a symfs was given */
1921 if (symbol_conf.symfs[0] != 0)
1922 return -1;
1923
1870 /* 1924 /*
1871 * Say the kernel DSO was created when processing the build-id header table, 1925 * Say the kernel DSO was created when processing the build-id header table,
1872 * we have a build-id, so check if it is the same as the running kernel, 1926 * we have a build-id, so check if it is the same as the running kernel,
@@ -2136,7 +2190,7 @@ struct process_args {
2136}; 2190};
2137 2191
2138static int symbol__in_kernel(void *arg, const char *name, 2192static int symbol__in_kernel(void *arg, const char *name,
2139 char type __used, u64 start) 2193 char type __used, u64 start, u64 end __used)
2140{ 2194{
2141 struct process_args *args = arg; 2195 struct process_args *args = arg;
2142 2196
@@ -2257,9 +2311,6 @@ static int vmlinux_path__init(void)
2257 struct utsname uts; 2311 struct utsname uts;
2258 char bf[PATH_MAX]; 2312 char bf[PATH_MAX];
2259 2313
2260 if (uname(&uts) < 0)
2261 return -1;
2262
2263 vmlinux_path = malloc(sizeof(char *) * 5); 2314 vmlinux_path = malloc(sizeof(char *) * 5);
2264 if (vmlinux_path == NULL) 2315 if (vmlinux_path == NULL)
2265 return -1; 2316 return -1;
@@ -2272,6 +2323,14 @@ static int vmlinux_path__init(void)
2272 if (vmlinux_path[vmlinux_path__nr_entries] == NULL) 2323 if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
2273 goto out_fail; 2324 goto out_fail;
2274 ++vmlinux_path__nr_entries; 2325 ++vmlinux_path__nr_entries;
2326
2327 /* only try running kernel version if no symfs was given */
2328 if (symbol_conf.symfs[0] != 0)
2329 return 0;
2330
2331 if (uname(&uts) < 0)
2332 return -1;
2333
2275 snprintf(bf, sizeof(bf), "/boot/vmlinux-%s", uts.release); 2334 snprintf(bf, sizeof(bf), "/boot/vmlinux-%s", uts.release);
2276 vmlinux_path[vmlinux_path__nr_entries] = strdup(bf); 2335 vmlinux_path[vmlinux_path__nr_entries] = strdup(bf);
2277 if (vmlinux_path[vmlinux_path__nr_entries] == NULL) 2336 if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
@@ -2331,6 +2390,8 @@ static int setup_list(struct strlist **list, const char *list_str,
2331 2390
2332int symbol__init(void) 2391int symbol__init(void)
2333{ 2392{
2393 const char *symfs;
2394
2334 if (symbol_conf.initialized) 2395 if (symbol_conf.initialized)
2335 return 0; 2396 return 0;
2336 2397
@@ -2359,6 +2420,18 @@ int symbol__init(void)
2359 symbol_conf.sym_list_str, "symbol") < 0) 2420 symbol_conf.sym_list_str, "symbol") < 0)
2360 goto out_free_comm_list; 2421 goto out_free_comm_list;
2361 2422
2423 /*
2424 * A path to symbols of "/" is identical to ""
2425 * reset here for simplicity.
2426 */
2427 symfs = realpath(symbol_conf.symfs, NULL);
2428 if (symfs == NULL)
2429 symfs = symbol_conf.symfs;
2430 if (strcmp(symfs, "/") == 0)
2431 symbol_conf.symfs = "";
2432 if (symfs != symbol_conf.symfs)
2433 free((void *)symfs);
2434
2362 symbol_conf.initialized = true; 2435 symbol_conf.initialized = true;
2363 return 0; 2436 return 0;
2364 2437
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 038f2201ee09..670cd1c88f54 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -72,6 +72,7 @@ struct symbol_conf {
72 show_cpu_utilization, 72 show_cpu_utilization,
73 initialized; 73 initialized;
74 const char *vmlinux_name, 74 const char *vmlinux_name,
75 *kallsyms_name,
75 *source_prefix, 76 *source_prefix,
76 *field_sep; 77 *field_sep;
77 const char *default_guest_vmlinux_name, 78 const char *default_guest_vmlinux_name,
@@ -85,6 +86,7 @@ struct symbol_conf {
85 struct strlist *dso_list, 86 struct strlist *dso_list,
86 *comm_list, 87 *comm_list,
87 *sym_list; 88 *sym_list;
89 const char *symfs;
88}; 90};
89 91
90extern struct symbol_conf symbol_conf; 92extern struct symbol_conf symbol_conf;
@@ -166,6 +168,8 @@ void dso__sort_by_name(struct dso *self, enum map_type type);
166struct dso *__dsos__findnew(struct list_head *head, const char *name); 168struct dso *__dsos__findnew(struct list_head *head, const char *name);
167 169
168int dso__load(struct dso *self, struct map *map, symbol_filter_t filter); 170int dso__load(struct dso *self, struct map *map, symbol_filter_t filter);
171int dso__load_vmlinux(struct dso *self, struct map *map,
172 const char *vmlinux, symbol_filter_t filter);
169int dso__load_vmlinux_path(struct dso *self, struct map *map, 173int dso__load_vmlinux_path(struct dso *self, struct map *map,
170 symbol_filter_t filter); 174 symbol_filter_t filter);
171int dso__load_kallsyms(struct dso *self, const char *filename, struct map *map, 175int dso__load_kallsyms(struct dso *self, const char *filename, struct map *map,
@@ -213,7 +217,7 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
213int build_id__sprintf(const u8 *self, int len, char *bf); 217int build_id__sprintf(const u8 *self, int len, char *bf);
214int kallsyms__parse(const char *filename, void *arg, 218int kallsyms__parse(const char *filename, void *arg,
215 int (*process_symbol)(void *arg, const char *name, 219 int (*process_symbol)(void *arg, const char *name,
216 char type, u64 start)); 220 char type, u64 start, u64 end));
217 221
218void machine__destroy_kernel_maps(struct machine *self); 222void machine__destroy_kernel_maps(struct machine *self);
219int __machine__create_kernel_maps(struct machine *self, struct dso *kernel); 223int __machine__create_kernel_maps(struct machine *self, struct dso *kernel);
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 8c72d888e449..00f4eade2e3e 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -16,35 +16,50 @@ static int filter(const struct dirent *dir)
16 return 1; 16 return 1;
17} 17}
18 18
19int find_all_tid(int pid, pid_t ** all_tid) 19struct thread_map *thread_map__new_by_pid(pid_t pid)
20{ 20{
21 struct thread_map *threads;
21 char name[256]; 22 char name[256];
22 int items; 23 int items;
23 struct dirent **namelist = NULL; 24 struct dirent **namelist = NULL;
24 int ret = 0;
25 int i; 25 int i;
26 26
27 sprintf(name, "/proc/%d/task", pid); 27 sprintf(name, "/proc/%d/task", pid);
28 items = scandir(name, &namelist, filter, NULL); 28 items = scandir(name, &namelist, filter, NULL);
29 if (items <= 0) 29 if (items <= 0)
30 return -ENOENT; 30 return NULL;
31 *all_tid = malloc(sizeof(pid_t) * items);
32 if (!*all_tid) {
33 ret = -ENOMEM;
34 goto failure;
35 }
36
37 for (i = 0; i < items; i++)
38 (*all_tid)[i] = atoi(namelist[i]->d_name);
39 31
40 ret = items; 32 threads = malloc(sizeof(*threads) + sizeof(pid_t) * items);
33 if (threads != NULL) {
34 for (i = 0; i < items; i++)
35 threads->map[i] = atoi(namelist[i]->d_name);
36 threads->nr = items;
37 }
41 38
42failure:
43 for (i=0; i<items; i++) 39 for (i=0; i<items; i++)
44 free(namelist[i]); 40 free(namelist[i]);
45 free(namelist); 41 free(namelist);
46 42
47 return ret; 43 return threads;
44}
45
46struct thread_map *thread_map__new_by_tid(pid_t tid)
47{
48 struct thread_map *threads = malloc(sizeof(*threads) + sizeof(pid_t));
49
50 if (threads != NULL) {
51 threads->map[0] = tid;
52 threads->nr = 1;
53 }
54
55 return threads;
56}
57
58struct thread_map *thread_map__new(pid_t pid, pid_t tid)
59{
60 if (pid != -1)
61 return thread_map__new_by_pid(pid);
62 return thread_map__new_by_tid(tid);
48} 63}
49 64
50static struct thread *thread__new(pid_t pid) 65static struct thread *thread__new(pid_t pid)
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 688500ff826f..d7574101054a 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -18,11 +18,24 @@ struct thread {
18 int comm_len; 18 int comm_len;
19}; 19};
20 20
21struct thread_map {
22 int nr;
23 int map[];
24};
25
21struct perf_session; 26struct perf_session;
22 27
23void thread__delete(struct thread *self); 28void thread__delete(struct thread *self);
24 29
25int find_all_tid(int pid, pid_t ** all_tid); 30struct thread_map *thread_map__new_by_pid(pid_t pid);
31struct thread_map *thread_map__new_by_tid(pid_t tid);
32struct thread_map *thread_map__new(pid_t pid, pid_t tid);
33
34static inline void thread_map__delete(struct thread_map *threads)
35{
36 free(threads);
37}
38
26int thread__set_comm(struct thread *self, const char *comm); 39int thread__set_comm(struct thread *self, const char *comm);
27int thread__comm_len(struct thread *self); 40int thread__comm_len(struct thread *self);
28struct thread *perf_session__findnew(struct perf_session *self, pid_t pid); 41struct thread *perf_session__findnew(struct perf_session *self, pid_t pid);
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index b1572601286c..35729f4c40cb 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -34,11 +34,13 @@
34#include <ctype.h> 34#include <ctype.h>
35#include <errno.h> 35#include <errno.h>
36#include <stdbool.h> 36#include <stdbool.h>
37#include <linux/list.h>
37#include <linux/kernel.h> 38#include <linux/kernel.h>
38 39
39#include "../perf.h" 40#include "../perf.h"
40#include "trace-event.h" 41#include "trace-event.h"
41#include "debugfs.h" 42#include "debugfs.h"
43#include "evsel.h"
42 44
43#define VERSION "0.5" 45#define VERSION "0.5"
44 46
@@ -469,16 +471,17 @@ out:
469} 471}
470 472
471static struct tracepoint_path * 473static struct tracepoint_path *
472get_tracepoints_path(struct perf_event_attr *pattrs, int nb_events) 474get_tracepoints_path(struct list_head *pattrs)
473{ 475{
474 struct tracepoint_path path, *ppath = &path; 476 struct tracepoint_path path, *ppath = &path;
475 int i, nr_tracepoints = 0; 477 struct perf_evsel *pos;
478 int nr_tracepoints = 0;
476 479
477 for (i = 0; i < nb_events; i++) { 480 list_for_each_entry(pos, pattrs, node) {
478 if (pattrs[i].type != PERF_TYPE_TRACEPOINT) 481 if (pos->attr.type != PERF_TYPE_TRACEPOINT)
479 continue; 482 continue;
480 ++nr_tracepoints; 483 ++nr_tracepoints;
481 ppath->next = tracepoint_id_to_path(pattrs[i].config); 484 ppath->next = tracepoint_id_to_path(pos->attr.config);
482 if (!ppath->next) 485 if (!ppath->next)
483 die("%s\n", "No memory to alloc tracepoints list"); 486 die("%s\n", "No memory to alloc tracepoints list");
484 ppath = ppath->next; 487 ppath = ppath->next;
@@ -487,21 +490,21 @@ get_tracepoints_path(struct perf_event_attr *pattrs, int nb_events)
487 return nr_tracepoints > 0 ? path.next : NULL; 490 return nr_tracepoints > 0 ? path.next : NULL;
488} 491}
489 492
490bool have_tracepoints(struct perf_event_attr *pattrs, int nb_events) 493bool have_tracepoints(struct list_head *pattrs)
491{ 494{
492 int i; 495 struct perf_evsel *pos;
493 496
494 for (i = 0; i < nb_events; i++) 497 list_for_each_entry(pos, pattrs, node)
495 if (pattrs[i].type == PERF_TYPE_TRACEPOINT) 498 if (pos->attr.type == PERF_TYPE_TRACEPOINT)
496 return true; 499 return true;
497 500
498 return false; 501 return false;
499} 502}
500 503
501int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events) 504int read_tracing_data(int fd, struct list_head *pattrs)
502{ 505{
503 char buf[BUFSIZ]; 506 char buf[BUFSIZ];
504 struct tracepoint_path *tps = get_tracepoints_path(pattrs, nb_events); 507 struct tracepoint_path *tps = get_tracepoints_path(pattrs);
505 508
506 /* 509 /*
507 * What? No tracepoints? No sense writing anything here, bail out. 510 * What? No tracepoints? No sense writing anything here, bail out.
@@ -545,14 +548,13 @@ int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events)
545 return 0; 548 return 0;
546} 549}
547 550
548ssize_t read_tracing_data_size(int fd, struct perf_event_attr *pattrs, 551ssize_t read_tracing_data_size(int fd, struct list_head *pattrs)
549 int nb_events)
550{ 552{
551 ssize_t size; 553 ssize_t size;
552 int err = 0; 554 int err = 0;
553 555
554 calc_data_size = 1; 556 calc_data_size = 1;
555 err = read_tracing_data(fd, pattrs, nb_events); 557 err = read_tracing_data(fd, pattrs);
556 size = calc_data_size - 1; 558 size = calc_data_size - 1;
557 calc_data_size = 0; 559 calc_data_size = 0;
558 560
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index b3e86b1e4444..b5f12ca24d99 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -262,9 +262,8 @@ raw_field_value(struct event *event, const char *name, void *data);
262void *raw_field_ptr(struct event *event, const char *name, void *data); 262void *raw_field_ptr(struct event *event, const char *name, void *data);
263unsigned long long eval_flag(const char *flag); 263unsigned long long eval_flag(const char *flag);
264 264
265int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events); 265int read_tracing_data(int fd, struct list_head *pattrs);
266ssize_t read_tracing_data_size(int fd, struct perf_event_attr *pattrs, 266ssize_t read_tracing_data_size(int fd, struct list_head *pattrs);
267 int nb_events);
268 267
269/* taken from kernel/trace/trace.h */ 268/* taken from kernel/trace/trace.h */
270enum trace_flag_type { 269enum trace_flag_type {
diff --git a/tools/perf/util/ui/util.c b/tools/perf/util/ui/util.c
index 056c69521a38..7b5a8926624e 100644
--- a/tools/perf/util/ui/util.c
+++ b/tools/perf/util/ui/util.c
@@ -104,10 +104,24 @@ out_destroy_form:
104 return rc; 104 return rc;
105} 105}
106 106
107static const char yes[] = "Yes", no[] = "No"; 107static const char yes[] = "Yes", no[] = "No",
108 warning_str[] = "Warning!", ok[] = "Ok";
108 109
109bool ui__dialog_yesno(const char *msg) 110bool ui__dialog_yesno(const char *msg)
110{ 111{
111 /* newtWinChoice should really be accepting const char pointers... */ 112 /* newtWinChoice should really be accepting const char pointers... */
112 return newtWinChoice(NULL, (char *)yes, (char *)no, (char *)msg) == 1; 113 return newtWinChoice(NULL, (char *)yes, (char *)no, (char *)msg) == 1;
113} 114}
115
116void ui__warning(const char *format, ...)
117{
118 va_list args;
119
120 va_start(args, format);
121 if (use_browser > 0)
122 newtWinMessagev((char *)warning_str, (char *)ok,
123 (char *)format, args);
124 else
125 vfprintf(stderr, format, args);
126 va_end(args);
127}
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 214265674ddd..5b3ea49aa63e 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -114,3 +114,20 @@ unsigned long convert_unit(unsigned long value, char *unit)
114 114
115 return value; 115 return value;
116} 116}
117
118int readn(int fd, void *buf, size_t n)
119{
120 void *buf_start = buf;
121
122 while (n) {
123 int ret = read(fd, buf, n);
124
125 if (ret <= 0)
126 return ret;
127
128 n -= ret;
129 buf += ret;
130 }
131
132 return buf - buf_start;
133}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 7562707ddd1c..e833f26f3bfc 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -265,6 +265,7 @@ void argv_free(char **argv);
265bool strglobmatch(const char *str, const char *pat); 265bool strglobmatch(const char *str, const char *pat);
266bool strlazymatch(const char *str, const char *pat); 266bool strlazymatch(const char *str, const char *pat);
267unsigned long convert_unit(unsigned long value, char *unit); 267unsigned long convert_unit(unsigned long value, char *unit);
268int readn(int fd, void *buf, size_t size);
268 269
269#define _STR(x) #x 270#define _STR(x) #x
270#define STR(x) _STR(x) 271#define STR(x) _STR(x)
diff --git a/tools/perf/util/xyarray.c b/tools/perf/util/xyarray.c
new file mode 100644
index 000000000000..22afbf6c536a
--- /dev/null
+++ b/tools/perf/util/xyarray.c
@@ -0,0 +1,20 @@
1#include "xyarray.h"
2#include "util.h"
3
4struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size)
5{
6 size_t row_size = ylen * entry_size;
7 struct xyarray *xy = zalloc(sizeof(*xy) + xlen * row_size);
8
9 if (xy != NULL) {
10 xy->entry_size = entry_size;
11 xy->row_size = row_size;
12 }
13
14 return xy;
15}
16
17void xyarray__delete(struct xyarray *xy)
18{
19 free(xy);
20}
diff --git a/tools/perf/util/xyarray.h b/tools/perf/util/xyarray.h
new file mode 100644
index 000000000000..c488a07275dd
--- /dev/null
+++ b/tools/perf/util/xyarray.h
@@ -0,0 +1,20 @@
1#ifndef _PERF_XYARRAY_H_
2#define _PERF_XYARRAY_H_ 1
3
4#include <sys/types.h>
5
6struct xyarray {
7 size_t row_size;
8 size_t entry_size;
9 char contents[];
10};
11
12struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size);
13void xyarray__delete(struct xyarray *xy);
14
15static inline void *xyarray__entry(struct xyarray *xy, int x, int y)
16{
17 return &xy->contents[x * xy->row_size + y * xy->entry_size];
18}
19
20#endif /* _PERF_XYARRAY_H_ */