aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--Documentation/kernel-parameters.txt4
-rw-r--r--Documentation/networking/00-INDEX6
-rw-r--r--Documentation/networking/dns_resolver.txt9
-rw-r--r--Documentation/rtc.txt29
-rw-r--r--Documentation/spinlocks.txt24
-rw-r--r--Documentation/trace/ftrace-design.txt7
-rw-r--r--Documentation/trace/ftrace.txt151
-rw-r--r--Documentation/trace/kprobetrace.txt16
-rw-r--r--MAINTAINERS31
-rw-r--r--Makefile2
-rw-r--r--arch/alpha/Kconfig1
-rw-r--r--arch/alpha/include/asm/futex.h29
-rw-r--r--arch/alpha/include/asm/rwsem.h36
-rw-r--r--arch/alpha/kernel/irq.c13
-rw-r--r--arch/alpha/kernel/irq_alpha.c11
-rw-r--r--arch/alpha/kernel/irq_i8259.c18
-rw-r--r--arch/alpha/kernel/irq_impl.h8
-rw-r--r--arch/alpha/kernel/irq_pyxis.c20
-rw-r--r--arch/alpha/kernel/irq_srm.c16
-rw-r--r--arch/alpha/kernel/osf_sys.c36
-rw-r--r--arch/alpha/kernel/sys_alcor.c28
-rw-r--r--arch/alpha/kernel/sys_cabriolet.c16
-rw-r--r--arch/alpha/kernel/sys_dp264.c52
-rw-r--r--arch/alpha/kernel/sys_eb64p.c18
-rw-r--r--arch/alpha/kernel/sys_eiger.c14
-rw-r--r--arch/alpha/kernel/sys_jensen.c24
-rw-r--r--arch/alpha/kernel/sys_marvel.c42
-rw-r--r--arch/alpha/kernel/sys_mikasa.c16
-rw-r--r--arch/alpha/kernel/sys_noritake.c16
-rw-r--r--arch/alpha/kernel/sys_rawhide.c17
-rw-r--r--arch/alpha/kernel/sys_rx164.c16
-rw-r--r--arch/alpha/kernel/sys_sable.c20
-rw-r--r--arch/alpha/kernel/sys_takara.c14
-rw-r--r--arch/alpha/kernel/sys_titan.c22
-rw-r--r--arch/alpha/kernel/sys_wildfire.c32
-rw-r--r--arch/alpha/kernel/time.c8
-rw-r--r--arch/arm/common/Kconfig2
-rw-r--r--arch/arm/include/asm/futex.h29
-rw-r--r--arch/arm/include/asm/mach/arch.h4
-rw-r--r--arch/arm/include/asm/pgalloc.h2
-rw-r--r--arch/arm/kernel/hw_breakpoint.c26
-rw-r--r--arch/arm/kernel/ptrace.c6
-rw-r--r--arch/arm/kernel/time.c4
-rw-r--r--arch/arm/mach-clps711x/include/mach/time.h2
-rw-r--r--arch/arm/mach-davinci/cpufreq.c2
-rw-r--r--arch/arm/mach-davinci/devices-da8xx.c7
-rw-r--r--arch/arm/mach-davinci/gpio-tnetv107x.c18
-rw-r--r--arch/arm/mach-davinci/include/mach/clkdev.h2
-rw-r--r--arch/arm/mach-omap2/clkt_dpll.c2
-rw-r--r--arch/arm/mach-omap2/mailbox.c12
-rw-r--r--arch/arm/mach-omap2/mux.c2
-rw-r--r--arch/arm/mach-omap2/pm-debug.c8
-rw-r--r--arch/arm/mach-omap2/prcm_mpu44xx.h4
-rw-r--r--arch/arm/mach-omap2/smartreflex.c37
-rw-r--r--arch/arm/mach-omap2/timer-gp.c13
-rw-r--r--arch/arm/mach-pxa/pxa25x.c1
-rw-r--r--arch/arm/mach-pxa/tosa-bt.c2
-rw-r--r--arch/arm/mach-pxa/tosa.c6
-rw-r--r--arch/arm/mach-s3c2440/Kconfig1
-rw-r--r--arch/arm/mach-s3c2440/include/mach/gta02.h26
-rw-r--r--arch/arm/mach-s3c64xx/clock.c6
-rw-r--r--arch/arm/mach-s3c64xx/dma.c11
-rw-r--r--arch/arm/mach-s3c64xx/gpiolib.c4
-rw-r--r--arch/arm/mach-s3c64xx/mach-smdk6410.c13
-rw-r--r--arch/arm/mach-s3c64xx/setup-keypad.c2
-rw-r--r--arch/arm/mach-s3c64xx/setup-sdhci.c2
-rw-r--r--arch/arm/mach-s5p64x0/include/mach/gpio.h4
-rw-r--r--arch/arm/mach-shmobile/board-ag5evm.c1
-rw-r--r--arch/arm/mach-shmobile/board-ap4evb.c2
-rw-r--r--arch/arm/mach-shmobile/board-mackerel.c2
-rw-r--r--arch/arm/mach-shmobile/clock-sh73a0.c17
-rw-r--r--arch/arm/mach-shmobile/include/mach/head-ap4evb.txt10
-rw-r--r--arch/arm/mach-shmobile/include/mach/head-mackerel.txt10
-rw-r--r--arch/arm/mach-tegra/include/mach/kbc.h1
-rw-r--r--arch/arm/plat-omap/mailbox.c11
-rw-r--r--arch/arm/plat-samsung/dev-uart.c2
-rw-r--r--arch/blackfin/kernel/time.c6
-rw-r--r--arch/blackfin/lib/outs.S16
-rw-r--r--arch/blackfin/mach-common/cache.S2
-rw-r--r--arch/cris/arch-v10/kernel/time.c4
-rw-r--r--arch/cris/arch-v32/kernel/smp.c4
-rw-r--r--arch/cris/arch-v32/kernel/time.c6
-rw-r--r--arch/cris/kernel/vmlinux.lds.S5
-rw-r--r--arch/frv/include/asm/futex.h5
-rw-r--r--arch/frv/kernel/futex.c14
-rw-r--r--arch/frv/kernel/time.c14
-rw-r--r--arch/h8300/kernel/time.c4
-rw-r--r--arch/h8300/kernel/timer/timer8.c2
-rw-r--r--arch/ia64/include/asm/futex.h15
-rw-r--r--arch/ia64/include/asm/rwsem.h37
-rw-r--r--arch/ia64/include/asm/xen/hypercall.h2
-rw-r--r--arch/ia64/kernel/time.c19
-rw-r--r--arch/ia64/xen/suspend.c9
-rw-r--r--arch/ia64/xen/time.c13
-rw-r--r--arch/m32r/kernel/time.c5
-rw-r--r--arch/m68k/bvme6000/config.c4
-rw-r--r--arch/m68k/kernel/time.c4
-rw-r--r--arch/m68k/mvme147/config.c4
-rw-r--r--arch/m68k/mvme16x/config.c4
-rw-r--r--arch/m68k/sun3/sun3ints.c2
-rw-r--r--arch/m68knommu/kernel/time.c8
-rw-r--r--arch/microblaze/include/asm/futex.h31
-rw-r--r--arch/mips/Kconfig4
-rw-r--r--arch/mips/alchemy/mtx-1/board_setup.c4
-rw-r--r--arch/mips/alchemy/mtx-1/platform.c9
-rw-r--r--arch/mips/alchemy/xxs1500/board_setup.c4
-rw-r--r--arch/mips/include/asm/futex.h39
-rw-r--r--arch/mips/include/asm/perf_event.h12
-rw-r--r--arch/mips/kernel/ftrace.c179
-rw-r--r--arch/mips/kernel/perf_event.c345
-rw-r--r--arch/mips/kernel/perf_event_mipsxx.c4
-rw-r--r--arch/mips/kernel/signal.c2
-rw-r--r--arch/mips/kernel/signal32.c2
-rw-r--r--arch/mips/kernel/smp.c31
-rw-r--r--arch/mips/kernel/syscall.c5
-rw-r--r--arch/mips/kernel/vpe.c4
-rw-r--r--arch/mips/loongson/Kconfig5
-rw-r--r--arch/mips/loongson/common/cmdline.c5
-rw-r--r--arch/mips/loongson/common/machtype.c3
-rw-r--r--arch/mips/math-emu/ieee754int.h4
-rw-r--r--arch/mips/mm/init.c2
-rw-r--r--arch/mips/mm/tlbex.c2
-rw-r--r--arch/mips/pci/ops-pmcmsp.c4
-rw-r--r--arch/mips/pmc-sierra/Kconfig4
-rw-r--r--arch/mips/pmc-sierra/msp71xx/msp_time.c2
-rw-r--r--arch/mn10300/include/asm/atomic.h2
-rw-r--r--arch/mn10300/include/asm/uaccess.h5
-rw-r--r--arch/mn10300/kernel/time.c6
-rw-r--r--arch/mn10300/mm/cache-inv-icache.c4
-rw-r--r--arch/parisc/hpux/sys_hpux.c65
-rw-r--r--arch/parisc/include/asm/futex.h24
-rw-r--r--arch/parisc/kernel/time.c7
-rw-r--r--arch/powerpc/include/asm/futex.h27
-rw-r--r--arch/powerpc/include/asm/lppaca.h16
-rw-r--r--arch/powerpc/include/asm/machdep.h6
-rw-r--r--arch/powerpc/include/asm/rwsem.h51
-rw-r--r--arch/powerpc/kernel/machine_kexec.c5
-rw-r--r--arch/powerpc/kernel/paca.c14
-rw-r--r--arch/powerpc/kernel/process.c8
-rw-r--r--arch/powerpc/mm/numa.c3
-rw-r--r--arch/powerpc/mm/tlb_hash64.c6
-rw-r--r--arch/powerpc/platforms/cell/spufs/syscalls.c2
-rw-r--r--arch/powerpc/platforms/iseries/dt.c6
-rw-r--r--arch/powerpc/platforms/iseries/setup.c1
-rw-r--r--arch/s390/include/asm/futex.h12
-rw-r--r--arch/s390/include/asm/rwsem.h63
-rw-r--r--arch/s390/include/asm/uaccess.h4
-rw-r--r--arch/s390/lib/uaccess.h8
-rw-r--r--arch/s390/lib/uaccess_pt.c17
-rw-r--r--arch/s390/lib/uaccess_std.c8
-rw-r--r--arch/sh/include/asm/futex-irq.h24
-rw-r--r--arch/sh/include/asm/futex.h11
-rw-r--r--arch/sh/include/asm/rwsem.h56
-rw-r--r--arch/sh/include/asm/sections.h2
-rw-r--r--arch/sh/kernel/cpu/sh4/setup-sh7750.c13
-rw-r--r--arch/sh/lib/delay.c10
-rw-r--r--arch/sh/mm/cache.c3
-rw-r--r--arch/sparc/include/asm/futex_64.h20
-rw-r--r--arch/sparc/include/asm/rwsem.h46
-rw-r--r--arch/sparc/kernel/pcic.c4
-rw-r--r--arch/sparc/kernel/time_32.c9
-rw-r--r--arch/sparc/lib/atomic32.c2
-rw-r--r--arch/tile/include/asm/futex.h27
-rw-r--r--arch/um/Kconfig.common1
-rw-r--r--arch/um/Kconfig.x865
-rw-r--r--arch/um/drivers/mconsole_kern.c21
-rw-r--r--arch/um/drivers/ubd_kern.c2
-rw-r--r--arch/um/kernel/irq.c31
-rw-r--r--arch/x86/Kconfig6
-rw-r--r--arch/x86/Kconfig.cpu5
-rw-r--r--arch/x86/boot/compressed/mkpiggy.c7
-rw-r--r--arch/x86/ia32/ia32entry.S32
-rw-r--r--arch/x86/include/asm/acpi.h11
-rw-r--r--arch/x86/include/asm/amd_nb.h14
-rw-r--r--arch/x86/include/asm/apic.h36
-rw-r--r--arch/x86/include/asm/ce4100.h6
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/entry_arch.h5
-rw-r--r--arch/x86/include/asm/frame.h6
-rw-r--r--arch/x86/include/asm/futex.h22
-rw-r--r--arch/x86/include/asm/hw_irq.h24
-rw-r--r--arch/x86/include/asm/init.h6
-rw-r--r--arch/x86/include/asm/ipi.h8
-rw-r--r--arch/x86/include/asm/irq_vectors.h45
-rw-r--r--arch/x86/include/asm/kdebug.h1
-rw-r--r--arch/x86/include/asm/mpspec.h3
-rw-r--r--arch/x86/include/asm/msr-index.h8
-rw-r--r--arch/x86/include/asm/nmi.h1
-rw-r--r--arch/x86/include/asm/numa.h52
-rw-r--r--arch/x86/include/asm/numa_32.h7
-rw-r--r--arch/x86/include/asm/numa_64.h23
-rw-r--r--arch/x86/include/asm/page_types.h9
-rw-r--r--arch/x86/include/asm/perf_event_p4.h1
-rw-r--r--arch/x86/include/asm/processor.h4
-rw-r--r--arch/x86/include/asm/rwsem.h80
-rw-r--r--arch/x86/include/asm/smp.h20
-rw-r--r--arch/x86/include/asm/smpboot_hooks.h2
-rw-r--r--arch/x86/include/asm/system.h2
-rw-r--r--arch/x86/include/asm/topology.h19
-rw-r--r--arch/x86/include/asm/unistd_32.h5
-rw-r--r--arch/x86/include/asm/unistd_64.h6
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h2
-rw-r--r--arch/x86/include/asm/xen/hypercall.h15
-rw-r--r--arch/x86/include/asm/xen/page.h47
-rw-r--r--arch/x86/include/asm/xen/pci.h8
-rw-r--r--arch/x86/kernel/acpi/boot.c22
-rw-r--r--arch/x86/kernel/amd_nb.c84
-rw-r--r--arch/x86/kernel/apb_timer.c2
-rw-r--r--arch/x86/kernel/aperture_64.c33
-rw-r--r--arch/x86/kernel/apic/apic.c76
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c4
-rw-r--r--arch/x86/kernel/apic/apic_noop.c26
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c34
-rw-r--r--arch/x86/kernel/apic/es7000_32.c35
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c1
-rw-r--r--arch/x86/kernel/apic/io_apic.c123
-rw-r--r--arch/x86/kernel/apic/ipi.c12
-rw-r--r--arch/x86/kernel/apic/numaq_32.c21
-rw-r--r--arch/x86/kernel/apic/probe_32.c10
-rw-r--r--arch/x86/kernel/apic/summit_32.c47
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c2
-rw-r--r--arch/x86/kernel/asm-offsets.c65
-rw-r--r--arch/x86/kernel/asm-offsets_32.c69
-rw-r--r--arch/x86/kernel/asm-offsets_64.c90
-rw-r--r--arch/x86/kernel/check.c8
-rw-r--r--arch/x86/kernel/cpu/amd.c61
-rw-r--r--arch/x86/kernel/cpu/common.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c13
-rw-r--r--arch/x86/kernel/cpu/intel.c5
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c80
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event.c170
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c175
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c417
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c97
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c19
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c4
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c4
-rw-r--r--arch/x86/kernel/dumpstack.c25
-rw-r--r--arch/x86/kernel/e820.c10
-rw-r--r--arch/x86/kernel/early-quirks.c16
-rw-r--r--arch/x86/kernel/entry_32.S11
-rw-r--r--arch/x86/kernel/entry_64.S13
-rw-r--r--arch/x86/kernel/ftrace.c15
-rw-r--r--arch/x86/kernel/head_32.S8
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/i8259.c2
-rw-r--r--arch/x86/kernel/ioport.c20
-rw-r--r--arch/x86/kernel/irq.c82
-rw-r--r--arch/x86/kernel/irqinit.c83
-rw-r--r--arch/x86/kernel/kgdb.c9
-rw-r--r--arch/x86/kernel/kprobes.c8
-rw-r--r--arch/x86/kernel/microcode_amd.c188
-rw-r--r--arch/x86/kernel/microcode_core.c6
-rw-r--r--arch/x86/kernel/process.c9
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/kernel/setup.c51
-rw-r--r--arch/x86/kernel/setup_percpu.c11
-rw-r--r--arch/x86/kernel/smpboot.c113
-rw-r--r--arch/x86/kernel/syscall_table_32.S3
-rw-r--r--arch/x86/kernel/vmlinux.lds.S3
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c1
-rw-r--r--arch/x86/kvm/svm.c2
-rw-r--r--arch/x86/kvm/trace.h8
-rw-r--r--arch/x86/lguest/boot.c4
-rw-r--r--arch/x86/lib/atomic64_386_32.S6
-rw-r--r--arch/x86/lib/atomic64_cx8_32.S6
-rw-r--r--arch/x86/lib/checksum_32.S63
-rw-r--r--arch/x86/lib/memmove_64.S197
-rw-r--r--arch/x86/lib/memmove_64.c192
-rw-r--r--arch/x86/lib/rwsem_64.S56
-rw-r--r--arch/x86/lib/semaphore_32.S38
-rw-r--r--arch/x86/lib/thunk_32.S18
-rw-r--r--arch/x86/lib/thunk_64.S27
-rw-r--r--arch/x86/mm/Makefile1
-rw-r--r--arch/x86/mm/amdtopology_64.c142
-rw-r--r--arch/x86/mm/fault.c14
-rw-r--r--arch/x86/mm/init.c56
-rw-r--r--arch/x86/mm/init_32.c11
-rw-r--r--arch/x86/mm/init_64.c78
-rw-r--r--arch/x86/mm/numa.c212
-rw-r--r--arch/x86/mm/numa_32.c10
-rw-r--r--arch/x86/mm/numa_64.c988
-rw-r--r--arch/x86/mm/numa_emulation.c494
-rw-r--r--arch/x86/mm/numa_internal.h31
-rw-r--r--arch/x86/mm/pageattr.c18
-rw-r--r--arch/x86/mm/pgtable.c11
-rw-r--r--arch/x86/mm/srat_32.c6
-rw-r--r--arch/x86/mm/srat_64.c367
-rw-r--r--arch/x86/mm/tlb.c14
-rw-r--r--arch/x86/pci/amd_bus.c2
-rw-r--r--arch/x86/pci/ce4100.c7
-rw-r--r--arch/x86/pci/xen.c159
-rw-r--r--arch/x86/platform/ce4100/ce4100.c2
-rw-r--r--arch/x86/platform/olpc/olpc_dt.c3
-rw-r--r--arch/x86/platform/uv/tlb_uv.c4
-rw-r--r--arch/x86/platform/uv/uv_irq.c4
-rw-r--r--arch/x86/platform/visws/visws_quirks.c4
-rw-r--r--arch/x86/xen/Kconfig8
-rw-r--r--arch/x86/xen/enlighten.c8
-rw-r--r--arch/x86/xen/mmu.c84
-rw-r--r--arch/x86/xen/p2m.c330
-rw-r--r--arch/x86/xen/setup.c68
-rw-r--r--arch/x86/xen/smp.c38
-rw-r--r--arch/x86/xen/suspend.c8
-rw-r--r--arch/x86/xen/time.c4
-rw-r--r--arch/x86/xen/xen-head.S4
-rw-r--r--arch/x86/xen/xen-ops.h2
-rw-r--r--arch/xtensa/include/asm/rwsem.h37
-rw-r--r--arch/xtensa/kernel/time.c6
-rw-r--r--block/blk-core.c18
-rw-r--r--block/blk-flush.c8
-rw-r--r--block/blk-lib.c21
-rw-r--r--block/blk-throttle.c29
-rw-r--r--block/cfq-iosched.c6
-rw-r--r--block/elevator.c4
-rw-r--r--block/genhd.c2
-rw-r--r--block/ioctl.c8
-rw-r--r--drivers/acpi/acpica/aclocal.h7
-rw-r--r--drivers/acpi/acpica/evgpe.c17
-rw-r--r--drivers/acpi/acpica/evxfgpe.c42
-rw-r--r--drivers/acpi/debugfs.c20
-rw-r--r--drivers/acpi/numa.c9
-rw-r--r--drivers/block/floppy.c2
-rw-r--r--drivers/block/loop.c5
-rw-r--r--drivers/block/xen-blkfront.c87
-rw-r--r--drivers/bluetooth/ath3k.c3
-rw-r--r--drivers/bluetooth/btusb.c9
-rw-r--r--drivers/char/agp/amd64-agp.c9
-rw-r--r--drivers/char/agp/intel-agp.h1
-rw-r--r--drivers/char/agp/intel-gtt.c56
-rw-r--r--drivers/char/ipmi/ipmi_si_intf.c8
-rw-r--r--drivers/char/mmtimer.c30
-rw-r--r--drivers/char/pcmcia/cm4000_cs.c3
-rw-r--r--drivers/char/pcmcia/ipwireless/main.c52
-rw-r--r--drivers/char/tpm/tpm.c10
-rw-r--r--drivers/char/virtio_console.c8
-rw-r--r--drivers/cpufreq/cpufreq.c27
-rw-r--r--drivers/gpio/ml_ioh_gpio.c1
-rw-r--r--drivers/gpio/pch_gpio.c1
-rw-r--r--drivers/gpu/drm/drm_fb_helper.c4
-rw-r--r--drivers/gpu/drm/drm_irq.c29
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c4
-rw-r--r--drivers/gpu/drm/i915/i915_dma.c11
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c17
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h24
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c2
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c4
-rw-r--r--drivers/gpu/drm/i915/i915_gem_tiling.c21
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c6
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h12
-rw-r--r--drivers/gpu/drm/i915/intel_display.c103
-rw-r--r--drivers/gpu/drm/i915/intel_panel.c36
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h13
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.c5
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_dma.c3
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drv.h3
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_mem.c6
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_mm.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_notifier.c11
-rw-r--r--drivers/gpu/drm/nouveau/nv50_instmem.c8
-rw-r--r--drivers/gpu/drm/nouveau/nv50_vm.c4
-rw-r--r--drivers/gpu/drm/radeon/evergreen.c3
-rw-r--r--drivers/gpu/drm/radeon/evergreen_blit_kms.c4
-rw-r--r--drivers/gpu/drm/radeon/r100.c26
-rw-r--r--drivers/gpu/drm/radeon/r600.c3
-rw-r--r--drivers/gpu/drm/radeon/r600_blit_kms.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon.h2
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.c3
-rw-r--r--drivers/gpu/drm/radeon/radeon_display.c2
-rw-r--r--drivers/gpu/drm/radeon/radeon_fb.c5
-rw-r--r--drivers/gpu/drm/radeon/radeon_gem.c5
-rw-r--r--drivers/gpu/drm/radeon/radeon_legacy_crtc.c3
-rw-r--r--drivers/gpu/drm/radeon/radeon_ttm.c14
-rw-r--r--drivers/gpu/drm/radeon/rs600.c1
-rw-r--r--drivers/gpu/drm/radeon/rs690.c1
-rw-r--r--drivers/gpu/drm/radeon/rv770.c3
-rw-r--r--drivers/hwmon/ad7414.c1
-rw-r--r--drivers/hwmon/adt7411.c1
-rw-r--r--drivers/hwmon/f71882fg.c4
-rw-r--r--drivers/i2c/busses/i2c-eg20t.c1
-rw-r--r--drivers/i2c/busses/i2c-ocores.c2
-rw-r--r--drivers/i2c/busses/i2c-omap.c39
-rw-r--r--drivers/i2c/busses/i2c-stu300.c2
-rw-r--r--drivers/idle/intel_idle.c24
-rw-r--r--drivers/input/gameport/gameport.c2
-rw-r--r--drivers/input/keyboard/tegra-kbc.c62
-rw-r--r--drivers/input/mouse/synaptics.h23
-rw-r--r--drivers/input/serio/serio.c2
-rw-r--r--drivers/isdn/hardware/eicon/istream.c2
-rw-r--r--drivers/md/linear.c1
-rw-r--r--drivers/md/md.c31
-rw-r--r--drivers/md/md.h2
-rw-r--r--drivers/md/multipath.c1
-rw-r--r--drivers/md/raid0.c2
-rw-r--r--drivers/md/raid1.c6
-rw-r--r--drivers/md/raid10.c7
-rw-r--r--drivers/md/raid5.c1
-rw-r--r--drivers/media/common/tuners/tda8290.c14
-rw-r--r--drivers/media/dvb/dvb-usb/dib0700_devices.c21
-rw-r--r--drivers/media/dvb/dvb-usb/lmedm04.c6
-rw-r--r--drivers/media/dvb/frontends/dib7000m.c19
-rw-r--r--drivers/media/dvb/frontends/dib7000m.h15
-rw-r--r--drivers/media/dvb/mantis/mantis_pci.c1
-rw-r--r--drivers/media/rc/ir-raw.c3
-rw-r--r--drivers/media/rc/mceusb.c27
-rw-r--r--drivers/media/rc/nuvoton-cir.c5
-rw-r--r--drivers/media/rc/nuvoton-cir.h7
-rw-r--r--drivers/media/rc/rc-main.c2
-rw-r--r--drivers/media/video/au0828/au0828-video.c28
-rw-r--r--drivers/media/video/cx18/cx18-cards.c50
-rw-r--r--drivers/media/video/cx18/cx18-driver.c25
-rw-r--r--drivers/media/video/cx18/cx18-driver.h3
-rw-r--r--drivers/media/video/cx18/cx18-dvb.c38
-rw-r--r--drivers/media/video/cx23885/cx23885-i2c.c10
-rw-r--r--drivers/media/video/cx25840/cx25840-core.c3
-rw-r--r--drivers/media/video/ivtv/ivtv-irq.c58
-rw-r--r--drivers/media/video/mem2mem_testdev.c1
-rw-r--r--drivers/media/video/s2255drv.c10
-rw-r--r--drivers/mfd/asic3.c4
-rw-r--r--drivers/mfd/davinci_voicecodec.c4
-rw-r--r--drivers/mfd/tps6586x.c10
-rw-r--r--drivers/mfd/ucb1x00-ts.c12
-rw-r--r--drivers/mfd/wm8994-core.c18
-rw-r--r--drivers/misc/bmp085.c1
-rw-r--r--drivers/mmc/core/core.c2
-rw-r--r--drivers/mmc/core/sdio.c3
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0001.c43
-rw-r--r--drivers/mtd/chips/jedec_probe.c35
-rw-r--r--drivers/mtd/maps/amd76xrom.c1
-rw-r--r--drivers/mtd/mtd_blkdevs.c1
-rw-r--r--drivers/mtd/nand/omap2.c2
-rw-r--r--drivers/mtd/onenand/generic.c2
-rw-r--r--drivers/mtd/onenand/omap2.c2
-rw-r--r--drivers/net/ariadne.c5
-rw-r--r--drivers/net/bnx2x/bnx2x.h31
-rw-r--r--drivers/net/bnx2x/bnx2x_cmn.c87
-rw-r--r--drivers/net/bnx2x/bnx2x_cmn.h29
-rw-r--r--drivers/net/bnx2x/bnx2x_ethtool.c39
-rw-r--r--drivers/net/bnx2x/bnx2x_init.h2
-rw-r--r--drivers/net/bnx2x/bnx2x_main.c37
-rw-r--r--drivers/net/bnx2x/bnx2x_stats.c4
-rw-r--r--drivers/net/bonding/bond_3ad.c32
-rw-r--r--drivers/net/bonding/bond_3ad.h3
-rw-r--r--drivers/net/can/softing/softing_main.c1
-rw-r--r--drivers/net/cnic.c33
-rw-r--r--drivers/net/davinci_emac.c2
-rw-r--r--drivers/net/dm9000.c9
-rw-r--r--drivers/net/dnet.c3
-rw-r--r--drivers/net/e1000/e1000_osdep.h3
-rw-r--r--drivers/net/e1000e/netdev.c3
-rw-r--r--drivers/net/fec.c3
-rw-r--r--drivers/net/igbvf/vf.c2
-rw-r--r--drivers/net/macb.c2
-rw-r--r--drivers/net/macvtap.c3
-rw-r--r--drivers/net/pcmcia/fmvj18x_cs.c1
-rw-r--r--drivers/net/r6040.c115
-rw-r--r--drivers/net/r8169.c48
-rw-r--r--drivers/net/sfc/ethtool.c22
-rw-r--r--drivers/net/skge.c3
-rw-r--r--drivers/net/smsc911x.c5
-rw-r--r--drivers/net/usb/dm9601.c4
-rw-r--r--drivers/net/wireless/ath/ath5k/phy.c143
-rw-r--r--drivers/net/wireless/ath/ath9k/ath9k.h6
-rw-r--r--drivers/net/wireless/ath/ath9k/hif_usb.c9
-rw-r--r--drivers/net/wireless/ath/ath9k/init.c8
-rw-r--r--drivers/net/wireless/ath/ath9k/mac.c5
-rw-r--r--drivers/net/wireless/ath/ath9k/main.c8
-rw-r--r--drivers/net/wireless/ath/carl9170/usb.c2
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-5000.c2
-rw-r--r--drivers/net/wireless/p54/p54pci.c14
-rw-r--r--drivers/net/wireless/p54/p54usb.c1
-rw-r--r--drivers/net/wireless/rndis_wlan.c3
-rw-r--r--drivers/net/wireless/rt2x00/rt2800pci.c8
-rw-r--r--drivers/net/wireless/rt2x00/rt2800usb.c6
-rw-r--r--drivers/nfc/Kconfig2
-rw-r--r--drivers/nfc/pn544.c4
-rw-r--r--drivers/of/pdt.c112
-rw-r--r--drivers/pci/xen-pcifront.c31
-rw-r--r--drivers/pcmcia/pcmcia_resource.c2
-rw-r--r--drivers/pcmcia/pxa2xx_base.c2
-rw-r--r--drivers/pcmcia/pxa2xx_base.h1
-rw-r--r--drivers/pcmcia/pxa2xx_colibri.c3
-rw-r--r--drivers/pcmcia/pxa2xx_lubbock.c1
-rw-r--r--drivers/pps/generators/Kconfig2
-rw-r--r--drivers/pps/kapi.c2
-rw-r--r--drivers/rapidio/rio-sysfs.c12
-rw-r--r--drivers/regulator/mc13xxx-regulator-core.c2
-rw-r--r--drivers/regulator/wm831x-dcdc.c1
-rw-r--r--drivers/rtc/class.c7
-rw-r--r--drivers/rtc/interface.c180
-rw-r--r--drivers/rtc/rtc-at91rm9200.c28
-rw-r--r--drivers/rtc/rtc-at91sam9.c30
-rw-r--r--drivers/rtc/rtc-bfin.c27
-rw-r--r--drivers/rtc/rtc-cmos.c66
-rw-r--r--drivers/rtc/rtc-davinci.c55
-rw-r--r--drivers/rtc/rtc-ds1511.c17
-rw-r--r--drivers/rtc/rtc-ds1553.c17
-rw-r--r--drivers/rtc/rtc-ds3232.c32
-rw-r--r--drivers/rtc/rtc-jz4740.c7
-rw-r--r--drivers/rtc/rtc-mc13xxx.c7
-rw-r--r--drivers/rtc/rtc-mpc5121.c20
-rw-r--r--drivers/rtc/rtc-mrst.c20
-rw-r--r--drivers/rtc/rtc-mxc.c7
-rw-r--r--drivers/rtc/rtc-nuc900.c15
-rw-r--r--drivers/rtc/rtc-omap.c39
-rw-r--r--drivers/rtc/rtc-pcap.c6
-rw-r--r--drivers/rtc/rtc-pcf50633.c22
-rw-r--r--drivers/rtc/rtc-pl030.c6
-rw-r--r--drivers/rtc/rtc-pl031.c55
-rw-r--r--drivers/rtc/rtc-proc.c8
-rw-r--r--drivers/rtc/rtc-pxa.c44
-rw-r--r--drivers/rtc/rtc-rs5c372.c52
-rw-r--r--drivers/rtc/rtc-rx8025.c25
-rw-r--r--drivers/rtc/rtc-s3c.c41
-rw-r--r--drivers/rtc/rtc-sa1100.c160
-rw-r--r--drivers/rtc/rtc-sh.c24
-rw-r--r--drivers/rtc/rtc-stmp3xxx.c15
-rw-r--r--drivers/rtc/rtc-test.c13
-rw-r--r--drivers/rtc/rtc-twl.c13
-rw-r--r--drivers/rtc/rtc-vr41xx.c32
-rw-r--r--drivers/rtc/rtc-wm831x.c16
-rw-r--r--drivers/rtc/rtc-wm8350.c21
-rw-r--r--drivers/s390/block/xpram.c4
-rw-r--r--drivers/s390/char/keyboard.c3
-rw-r--r--drivers/s390/char/tape.h8
-rw-r--r--drivers/s390/char/tape_34xx.c59
-rw-r--r--drivers/s390/char/tape_3590.c83
-rw-r--r--drivers/scsi/scsi_lib.c2
-rw-r--r--drivers/scsi/scsi_transport_fc.c2
-rw-r--r--drivers/target/target_core_tmr.c5
-rw-r--r--drivers/target/target_core_transport.c8
-rw-r--r--drivers/thermal/Kconfig1
-rw-r--r--drivers/thermal/thermal_sys.c40
-rw-r--r--drivers/tty/serial/serial_cs.c1
-rw-r--r--drivers/usb/core/hub.c18
-rw-r--r--drivers/usb/core/quirks.c8
-rw-r--r--drivers/usb/gadget/f_phonet.c15
-rw-r--r--drivers/usb/host/ehci-xilinx-of.c1
-rw-r--r--drivers/usb/host/xhci-dbg.c9
-rw-r--r--drivers/usb/host/xhci-mem.c10
-rw-r--r--drivers/usb/host/xhci-ring.c40
-rw-r--r--drivers/usb/host/xhci.c14
-rw-r--r--drivers/usb/host/xhci.h2
-rw-r--r--drivers/usb/musb/musb_core.c1
-rw-r--r--drivers/usb/musb/musb_core.h17
-rw-r--r--drivers/usb/musb/omap2430.c1
-rw-r--r--drivers/usb/serial/sierra.c3
-rw-r--r--drivers/usb/serial/usb_wwan.c15
-rw-r--r--drivers/usb/serial/visor.c12
-rw-r--r--drivers/video/backlight/ltv350qv.c9
-rw-r--r--drivers/watchdog/cpwd.c2
-rw-r--r--drivers/watchdog/hpwdt.c4
-rw-r--r--drivers/watchdog/sbc_fitpc2_wdt.c7
-rw-r--r--drivers/watchdog/sch311x_wdt.c2
-rw-r--r--drivers/watchdog/w83697ug_wdt.c2
-rw-r--r--drivers/xen/balloon.c16
-rw-r--r--drivers/xen/events.c342
-rw-r--r--drivers/xen/manage.c153
-rw-r--r--drivers/xen/platform-pci.c3
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Makefile2
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/aio.c52
-rw-r--r--fs/block_dev.c19
-rw-r--r--fs/btrfs/ctree.h12
-rw-r--r--fs/btrfs/export.c8
-rw-r--r--fs/btrfs/extent-tree.c44
-rw-r--r--fs/btrfs/extent_io.c165
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/file.c114
-rw-r--r--fs/btrfs/inode.c135
-rw-r--r--fs/btrfs/ioctl.c7
-rw-r--r--fs/btrfs/lzo.c21
-rw-r--r--fs/btrfs/relocation.c13
-rw-r--r--fs/btrfs/super.c7
-rw-r--r--fs/btrfs/volumes.c13
-rw-r--r--fs/ceph/dir.c30
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/ceph/super.h1
-rw-r--r--fs/compat.c69
-rw-r--r--fs/dcache.c121
-rw-r--r--fs/eventpoll.c95
-rw-r--r--fs/exec.c18
-rw-r--r--fs/exofs/namei.c8
-rw-r--r--fs/exportfs/expfs.c11
-rw-r--r--fs/ext2/namei.c9
-rw-r--r--fs/ext3/namei.c7
-rw-r--r--fs/ext3/super.c1
-rw-r--r--fs/ext4/namei.c7
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/fat/inode.c4
-rw-r--r--fs/fat/namei_vfat.c4
-rw-r--r--fs/fcntl.c37
-rw-r--r--fs/fhandle.c265
-rw-r--r--fs/file_table.c55
-rw-r--r--fs/fuse/dir.c9
-rw-r--r--fs/fuse/file.c52
-rw-r--r--fs/fuse/fuse_i.h6
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/gfs2/dentry.c2
-rw-r--r--fs/gfs2/export.c8
-rw-r--r--fs/gfs2/main.c9
-rw-r--r--fs/hfs/dir.c50
-rw-r--r--fs/inode.c31
-rw-r--r--fs/internal.h15
-rw-r--r--fs/isofs/export.c8
-rw-r--r--fs/jfs/namei.c5
-rw-r--r--fs/minix/namei.c8
-rw-r--r--fs/namei.c1490
-rw-r--r--fs/namespace.c18
-rw-r--r--fs/nfs/inode.c7
-rw-r--r--fs/nfs/nfs4_fs.h10
-rw-r--r--fs/nfs/nfs4filelayoutdev.c4
-rw-r--r--fs/nfs/nfs4proc.c131
-rw-r--r--fs/nfs/nfs4state.c29
-rw-r--r--fs/nfs/nfs4xdr.c4
-rw-r--r--fs/nfs/nfsroot.c29
-rw-r--r--fs/nfs/unlink.c2
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/nfsctl.c21
-rw-r--r--fs/nfsd/nfs4callback.c2
-rw-r--r--fs/nfsd/nfs4state.c13
-rw-r--r--fs/nfsd/nfs4xdr.c4
-rw-r--r--fs/nilfs2/btnode.c5
-rw-r--r--fs/nilfs2/btnode.h1
-rw-r--r--fs/nilfs2/mdt.c4
-rw-r--r--fs/nilfs2/namei.c8
-rw-r--r--fs/nilfs2/page.c13
-rw-r--r--fs/nilfs2/page.h1
-rw-r--r--fs/nilfs2/segment.c3
-rw-r--r--fs/nilfs2/super.c2
-rw-r--r--fs/ocfs2/dcache.c2
-rw-r--r--fs/ocfs2/export.c8
-rw-r--r--fs/ocfs2/journal.h6
-rw-r--r--fs/ocfs2/refcounttree.c9
-rw-r--r--fs/ocfs2/super.c28
-rw-r--r--fs/open.c134
-rw-r--r--fs/partitions/ldm.c5
-rw-r--r--fs/partitions/osf.c12
-rw-r--r--fs/proc/base.c30
-rw-r--r--fs/proc/inode.c8
-rw-r--r--fs/proc/proc_devtree.c2
-rw-r--r--fs/proc/proc_sysctl.c7
-rw-r--r--fs/reiserfs/inode.c7
-rw-r--r--fs/reiserfs/namei.c6
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/stat.c7
-rw-r--r--fs/statfs.c176
-rw-r--r--fs/sysv/namei.c8
-rw-r--r--fs/ubifs/dir.c18
-rw-r--r--fs/udf/namei.c18
-rw-r--r--fs/ufs/namei.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_discard.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c11
-rw-r--r--fs/xfs/xfs_fsops.c3
-rw-r--r--include/asm-generic/cputime.h3
-rw-r--r--include/asm-generic/fcntl.h4
-rw-r--r--include/asm-generic/futex.h7
-rw-r--r--include/asm-generic/pgtable.h2
-rw-r--r--include/asm-generic/sections.h1
-rw-r--r--include/asm-generic/unistd.h6
-rw-r--r--include/asm-generic/vmlinux.lds.h6
-rw-r--r--include/drm/drmP.h2
-rw-r--r--include/keys/rxrpc-type.h1
-rw-r--r--include/linux/blkdev.h5
-rw-r--r--include/linux/blktrace_api.h1
-rw-r--r--include/linux/ceph/messenger.h2
-rw-r--r--include/linux/cgroup.h4
-rw-r--r--include/linux/cgroup_subsys.h4
-rw-r--r--include/linux/dcbnl.h2
-rw-r--r--include/linux/debugobjects.h5
-rw-r--r--include/linux/exportfs.h9
-rw-r--r--include/linux/fcntl.h1
-rw-r--r--include/linux/file.h2
-rw-r--r--include/linux/fs.h23
-rw-r--r--include/linux/ftrace.h2
-rw-r--r--include/linux/ftrace_event.h2
-rw-r--r--include/linux/gfp.h11
-rw-r--r--include/linux/hrtimer.h24
-rw-r--r--include/linux/interrupt.h85
-rw-r--r--include/linux/irq.h368
-rw-r--r--include/linux/irqdesc.h78
-rw-r--r--include/linux/jiffies.h1
-rw-r--r--include/linux/kthread.h2
-rw-r--r--include/linux/mfd/wm8994/core.h1
-rw-r--r--include/linux/mm.h2
-rw-r--r--include/linux/namei.h7
-rw-r--r--include/linux/netdevice.h3
-rw-r--r--include/linux/nfs_fs_sb.h10
-rw-r--r--include/linux/pci_ids.h1
-rw-r--r--include/linux/perf_event.h50
-rw-r--r--include/linux/plist.h47
-rw-r--r--include/linux/pm.h2
-rw-r--r--include/linux/pm_wakeup.h25
-rw-r--r--include/linux/posix-clock.h150
-rw-r--r--include/linux/posix-timers.h44
-rw-r--r--include/linux/ptrace.h3
-rw-r--r--include/linux/ring_buffer.h2
-rw-r--r--include/linux/rio_regs.h4
-rw-r--r--include/linux/rtc.h5
-rw-r--r--include/linux/rwlock_types.h8
-rw-r--r--include/linux/rwsem-spinlock.h31
-rw-r--r--include/linux/rwsem.h53
-rw-r--r--include/linux/sched.h22
-rw-r--r--include/linux/security.h9
-rw-r--r--include/linux/spinlock_types.h8
-rw-r--r--include/linux/sunrpc/sched.h1
-rw-r--r--include/linux/syscalls.h20
-rw-r--r--include/linux/sysctl.h14
-rw-r--r--include/linux/thermal.h8
-rw-r--r--include/linux/thread_info.h3
-rw-r--r--include/linux/time.h14
-rw-r--r--include/linux/timex.h3
-rw-r--r--include/net/ipv6.h12
-rw-r--r--include/net/netfilter/nf_tproxy_core.h12
-rw-r--r--include/net/sch_generic.h2
-rw-r--r--include/pcmcia/ds.h1
-rw-r--r--include/sound/wm8903.h10
-rw-r--r--include/target/target_core_transport.h2
-rw-r--r--include/trace/events/block.h6
-rw-r--r--include/trace/events/mce.h8
-rw-r--r--include/trace/events/module.h5
-rw-r--r--include/trace/events/skb.h4
-rw-r--r--include/xen/events.h8
-rw-r--r--include/xen/interface/io/blkif.h37
-rw-r--r--include/xen/interface/xen.h4
-rw-r--r--include/xen/xen-ops.h6
-rw-r--r--init/Kconfig22
-rw-r--r--kernel/audit_watch.c85
-rw-r--r--kernel/cgroup.c54
-rw-r--r--kernel/compat.c136
-rw-r--r--kernel/cpuset.c7
-rw-r--r--kernel/cred.c2
-rw-r--r--kernel/futex.c147
-rw-r--r--kernel/hrtimer.c90
-rw-r--r--kernel/irq/Kconfig39
-rw-r--r--kernel/irq/autoprobe.c54
-rw-r--r--kernel/irq/chip.c483
-rw-r--r--kernel/irq/compat.h72
-rw-r--r--kernel/irq/debug.h40
-rw-r--r--kernel/irq/handle.c144
-rw-r--r--kernel/irq/internals.h173
-rw-r--r--kernel/irq/irqdesc.c79
-rw-r--r--kernel/irq/manage.c604
-rw-r--r--kernel/irq/migration.c38
-rw-r--r--kernel/irq/pm.c30
-rw-r--r--kernel/irq/proc.c70
-rw-r--r--kernel/irq/resend.c19
-rw-r--r--kernel/irq/settings.h138
-rw-r--r--kernel/irq/spurious.c163
-rw-r--r--kernel/perf_event.c1023
-rw-r--r--kernel/posix-cpu-timers.c110
-rw-r--r--kernel/posix-timers.c342
-rw-r--r--kernel/ptrace.c6
-rw-r--r--kernel/rtmutex-debug.c1
-rw-r--r--kernel/rtmutex-tester.c40
-rw-r--r--kernel/rtmutex.c318
-rw-r--r--kernel/rtmutex_common.h16
-rw-r--r--kernel/sched.c339
-rw-r--r--kernel/sched_autogroup.c15
-rw-r--r--kernel/sched_autogroup.h5
-rw-r--r--kernel/sched_debug.c2
-rw-r--r--kernel/sched_fair.c397
-rw-r--r--kernel/sched_idletask.c26
-rw-r--r--kernel/sched_rt.c33
-rw-r--r--kernel/sched_stoptask.c7
-rw-r--r--kernel/softirq.c24
-rw-r--r--kernel/sys_ni.c5
-rw-r--r--kernel/sysctl.c26
-rw-r--r--kernel/sysctl_binary.c19
-rw-r--r--kernel/time.c35
-rw-r--r--kernel/time/Makefile3
-rw-r--r--kernel/time/clockevents.c1
-rw-r--r--kernel/time/jiffies.c20
-rw-r--r--kernel/time/ntp.c13
-rw-r--r--kernel/time/posix-clock.c451
-rw-r--r--kernel/time/tick-broadcast.c11
-rw-r--r--kernel/time/tick-common.c7
-rw-r--r--kernel/time/tick-internal.h12
-rw-r--r--kernel/time/tick-oneshot.c1
-rw-r--r--kernel/time/tick-sched.c1
-rw-r--r--kernel/time/timekeeping.c141
-rw-r--r--kernel/timer.c42
-rw-r--r--kernel/trace/blktrace.c16
-rw-r--r--kernel/trace/ftrace.c52
-rw-r--r--kernel/trace/ring_buffer.c24
-rw-r--r--kernel/trace/trace.c38
-rw-r--r--kernel/trace/trace.h41
-rw-r--r--kernel/trace/trace_entries.h6
-rw-r--r--kernel/trace/trace_events.c2
-rw-r--r--kernel/trace/trace_events_filter.c885
-rw-r--r--kernel/trace/trace_kprobe.c111
-rw-r--r--kernel/trace/trace_output.c36
-rw-r--r--kernel/trace/trace_sched_switch.c48
-rw-r--r--kernel/trace/trace_syscalls.c42
-rw-r--r--kernel/workqueue.c6
-rw-r--r--lib/debugobjects.c9
-rw-r--r--lib/nlattr.c2
-rw-r--r--lib/plist.c135
-rw-r--r--lib/rwsem.c10
-rw-r--r--lib/swiotlb.c6
-rw-r--r--mm/Makefile8
-rw-r--r--mm/bootmem.c180
-rw-r--r--mm/huge_memory.c34
-rw-r--r--mm/memory.c2
-rw-r--r--mm/mempolicy.c16
-rw-r--r--mm/migrate.c6
-rw-r--r--mm/mremap.c4
-rw-r--r--mm/nobootmem.c435
-rw-r--r--mm/page_alloc.c76
-rw-r--r--mm/rmap.c54
-rw-r--r--mm/shmem.c4
-rw-r--r--mm/swapfile.c2
-rw-r--r--mm/truncate.c2
-rw-r--r--mm/vmscan.c32
-rw-r--r--net/Makefile4
-rw-r--r--net/bluetooth/rfcomm/tty.c2
-rw-r--r--net/bridge/Kconfig1
-rw-r--r--net/bridge/br_multicast.c23
-rw-r--r--net/ceph/messenger.c71
-rw-r--r--net/ceph/pagevec.c18
-rw-r--r--net/core/dev.c12
-rw-r--r--net/core/dev_addr_lists.c2
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/core/scm.c2
-rw-r--r--net/dcb/dcbnl.c2
-rw-r--r--net/dccp/input.c7
-rw-r--r--net/dns_resolver/dns_key.c20
-rw-r--r--net/ipv4/devinet.c6
-rw-r--r--net/ipv4/inet_timewait_sock.c2
-rw-r--r--net/ipv4/ip_gre.c2
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/tcp_input.c5
-rw-r--r--net/ipv4/tcp_output.c2
-rw-r--r--net/ipv6/ip6_tunnel.c1
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c2
-rw-r--r--net/ipv6/route.c21
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/mac80211/iface.c1
-rw-r--r--net/mac80211/mlme.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c4
-rw-r--r--net/netfilter/nf_log.c4
-rw-r--r--net/netfilter/nf_tproxy_core.c27
-rw-r--r--net/netfilter/xt_TPROXY.c22
-rw-r--r--net/netfilter/xt_socket.c13
-rw-r--r--net/netlink/af_netlink.c18
-rw-r--r--net/rds/ib_send.c5
-rw-r--r--net/rds/loop.c11
-rw-r--r--net/rxrpc/ar-input.c1
-rw-r--r--net/rxrpc/ar-key.c8
-rw-r--r--net/sched/sch_generic.c1
-rw-r--r--net/sctp/sm_make_chunk.c10
-rw-r--r--net/sunrpc/sched.c75
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c1
-rw-r--r--net/sunrpc/xprtsock.c3
-rw-r--r--net/unix/af_unix.c19
-rw-r--r--net/unix/garbage.c2
-rw-r--r--net/wireless/wext-compat.c4
-rw-r--r--scripts/basic/fixdep.c19
-rwxr-xr-xscripts/checkpatch.pl5
-rw-r--r--scripts/kconfig/streamline_config.pl2
-rw-r--r--scripts/mod/sumversion.c19
-rw-r--r--scripts/recordmcount.c3
-rwxr-xr-xscripts/recordmcount.pl1
-rw-r--r--scripts/rt-tester/rt-tester.py2
-rw-r--r--scripts/rt-tester/t2-l1-2rt-sameprio.tst5
-rw-r--r--scripts/rt-tester/t2-l1-pi.tst5
-rw-r--r--scripts/rt-tester/t2-l1-signal.tst5
-rw-r--r--scripts/rt-tester/t2-l2-2rt-deadlock.tst5
-rw-r--r--scripts/rt-tester/t3-l1-pi-1rt.tst5
-rw-r--r--scripts/rt-tester/t3-l1-pi-2rt.tst5
-rw-r--r--scripts/rt-tester/t3-l1-pi-3rt.tst5
-rw-r--r--scripts/rt-tester/t3-l1-pi-signal.tst5
-rw-r--r--scripts/rt-tester/t3-l1-pi-steal.tst5
-rw-r--r--scripts/rt-tester/t3-l2-pi.tst5
-rw-r--r--scripts/rt-tester/t4-l2-pi-deboost.tst5
-rw-r--r--scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst5
-rw-r--r--scripts/rt-tester/t5-l4-pi-boost-deboost.tst5
-rw-r--r--security/commoncap.c2
-rw-r--r--security/security.c2
-rw-r--r--sound/core/jack.c1
-rw-r--r--sound/pci/hda/patch_cirrus.c2
-rw-r--r--sound/pci/hda/patch_conexant.c5
-rw-r--r--sound/pci/hda/patch_hdmi.c5
-rw-r--r--sound/pci/hda/patch_realtek.c9
-rw-r--r--sound/pci/hda/patch_sigmatel.c15
-rw-r--r--sound/pci/hda/patch_via.c2
-rw-r--r--sound/soc/codecs/cx20442.c2
-rw-r--r--sound/soc/codecs/wm8903.c2
-rw-r--r--sound/soc/codecs/wm8903.h2
-rw-r--r--sound/soc/codecs/wm8978.c14
-rw-r--r--sound/soc/codecs/wm8994.c249
-rw-r--r--sound/soc/codecs/wm9081.c5
-rw-r--r--sound/soc/codecs/wm_hubs.c3
-rw-r--r--sound/soc/imx/eukrea-tlv320.c2
-rw-r--r--sound/soc/omap/am3517evm.c2
-rw-r--r--sound/soc/pxa/e740_wm9705.c4
-rw-r--r--sound/soc/pxa/e750_wm9705.c4
-rw-r--r--sound/soc/pxa/e800_wm9712.c4
-rw-r--r--sound/soc/pxa/em-x270.c4
-rw-r--r--sound/soc/pxa/mioa701_wm9713.c4
-rw-r--r--sound/soc/pxa/palm27x.c4
-rw-r--r--sound/soc/pxa/tosa.c4
-rw-r--r--sound/soc/pxa/zylonite.c4
-rw-r--r--sound/soc/soc-dapm.c25
-rw-r--r--sound/usb/card.c4
-rw-r--r--sound/usb/pcm.c7
-rw-r--r--sound/usb/usbaudio.h1
-rw-r--r--tools/perf/.gitignore1
-rw-r--r--tools/perf/Documentation/Makefile19
-rw-r--r--tools/perf/Documentation/perf-list.txt23
-rw-r--r--tools/perf/Documentation/perf-lock.txt12
-rw-r--r--tools/perf/Documentation/perf-probe.txt26
-rw-r--r--tools/perf/Documentation/perf-record.txt11
-rw-r--r--tools/perf/Documentation/perf-stat.txt11
-rw-r--r--tools/perf/Makefile649
-rw-r--r--tools/perf/bench/sched-pipe.c2
-rw-r--r--tools/perf/builtin-annotate.c351
-rw-r--r--tools/perf/builtin-diff.c16
-rw-r--r--tools/perf/builtin-inject.c82
-rw-r--r--tools/perf/builtin-kmem.c10
-rw-r--r--tools/perf/builtin-list.c43
-rw-r--r--tools/perf/builtin-lock.c8
-rw-r--r--tools/perf/builtin-probe.c70
-rw-r--r--tools/perf/builtin-record.c450
-rw-r--r--tools/perf/builtin-report.c225
-rw-r--r--tools/perf/builtin-sched.c27
-rw-r--r--tools/perf/builtin-script.c17
-rw-r--r--tools/perf/builtin-stat.c118
-rw-r--r--tools/perf/builtin-test.c184
-rw-r--r--tools/perf/builtin-timechart.c19
-rw-r--r--tools/perf/builtin-top.c1029
-rw-r--r--tools/perf/perf.h26
-rwxr-xr-xtools/perf/python/twatch.py41
-rw-r--r--tools/perf/util/annotate.c605
-rw-r--r--tools/perf/util/annotate.h103
-rw-r--r--tools/perf/util/build-id.c21
-rw-r--r--tools/perf/util/cache.h7
-rw-r--r--tools/perf/util/callchain.c227
-rw-r--r--tools/perf/util/callchain.h76
-rw-r--r--tools/perf/util/cgroup.c178
-rw-r--r--tools/perf/util/cgroup.h17
-rw-r--r--tools/perf/util/cpumap.c5
-rw-r--r--tools/perf/util/cpumap.h2
-rw-r--r--tools/perf/util/debug.c2
-rw-r--r--tools/perf/util/debug.h2
-rw-r--r--tools/perf/util/event.c374
-rw-r--r--tools/perf/util/event.h78
-rw-r--r--tools/perf/util/evlist.c394
-rw-r--r--tools/perf/util/evlist.h68
-rw-r--r--tools/perf/util/evsel.c234
-rw-r--r--tools/perf/util/evsel.h47
-rw-r--r--tools/perf/util/exec_cmd.c19
-rw-r--r--tools/perf/util/header.c546
-rw-r--r--tools/perf/util/header.h95
-rw-r--r--tools/perf/util/hist.c257
-rw-r--r--tools/perf/util/hist.h60
-rw-r--r--tools/perf/util/include/linux/list.h1
-rw-r--r--tools/perf/util/parse-events.c175
-rw-r--r--tools/perf/util/parse-events.h12
-rw-r--r--tools/perf/util/probe-event.c159
-rw-r--r--tools/perf/util/probe-event.h4
-rw-r--r--tools/perf/util/probe-finder.c533
-rw-r--r--tools/perf/util/python.c896
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c3
-rw-r--r--tools/perf/util/session.c266
-rw-r--r--tools/perf/util/session.h41
-rw-r--r--tools/perf/util/setup.py19
-rw-r--r--tools/perf/util/strfilter.c199
-rw-r--r--tools/perf/util/strfilter.h48
-rw-r--r--tools/perf/util/svghelper.c6
-rw-r--r--tools/perf/util/symbol.c7
-rw-r--r--tools/perf/util/symbol.h1
-rw-r--r--tools/perf/util/thread.c55
-rw-r--r--tools/perf/util/thread.h14
-rw-r--r--tools/perf/util/thread_map.c64
-rw-r--r--tools/perf/util/thread_map.h15
-rw-r--r--tools/perf/util/top.c238
-rw-r--r--tools/perf/util/top.h66
-rw-r--r--tools/perf/util/trace-event-parse.c2
-rw-r--r--tools/perf/util/ui/browser.c25
-rw-r--r--tools/perf/util/ui/browser.h3
-rw-r--r--tools/perf/util/ui/browsers/annotate.c178
-rw-r--r--tools/perf/util/ui/browsers/hists.c197
-rw-r--r--tools/perf/util/ui/browsers/map.c2
-rw-r--r--tools/perf/util/ui/browsers/top.c213
-rw-r--r--tools/perf/util/ui/helpline.c5
-rw-r--r--tools/perf/util/ui/libslang.h6
-rw-r--r--tools/perf/util/ui/setup.c8
-rw-r--r--tools/perf/util/ui/ui.h8
-rw-r--r--tools/perf/util/ui/util.c7
-rw-r--r--tools/perf/util/util.h26
-rwxr-xr-xtools/testing/ktest/ktest.pl2
1000 files changed, 24168 insertions, 15135 deletions
diff --git a/.gitignore b/.gitignore
index 8faa6c02b39e..5d56a3fd0de6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,6 +28,7 @@ modules.builtin
28*.gz 28*.gz
29*.bz2 29*.bz2
30*.lzma 30*.lzma
31*.xz
31*.lzo 32*.lzo
32*.patch 33*.patch
33*.gcno 34*.gcno
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index f4a04c0c7edc..738c6fda3fb0 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2444,6 +2444,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2444 <deci-seconds>: poll all this frequency 2444 <deci-seconds>: poll all this frequency
2445 0: no polling (default) 2445 0: no polling (default)
2446 2446
2447 threadirqs [KNL]
2448 Force threading of all interrupt handlers except those
2449 marked explicitely IRQF_NO_THREAD.
2450
2447 topology= [S390] 2451 topology= [S390]
2448 Format: {off | on} 2452 Format: {off | on}
2449 Specify if the kernel should make use of the cpu 2453 Specify if the kernel should make use of the cpu
diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX
index fe5c099b8fc8..4edd78dfb362 100644
--- a/Documentation/networking/00-INDEX
+++ b/Documentation/networking/00-INDEX
@@ -40,8 +40,6 @@ decnet.txt
40 - info on using the DECnet networking layer in Linux. 40 - info on using the DECnet networking layer in Linux.
41depca.txt 41depca.txt
42 - the Digital DEPCA/EtherWORKS DE1?? and DE2?? LANCE Ethernet driver 42 - the Digital DEPCA/EtherWORKS DE1?? and DE2?? LANCE Ethernet driver
43dgrs.txt
44 - the Digi International RightSwitch SE-X Ethernet driver
45dmfe.txt 43dmfe.txt
46 - info on the Davicom DM9102(A)/DM9132/DM9801 fast ethernet driver. 44 - info on the Davicom DM9102(A)/DM9132/DM9801 fast ethernet driver.
47e100.txt 45e100.txt
@@ -50,8 +48,6 @@ e1000.txt
50 - info on Intel's E1000 line of gigabit ethernet boards 48 - info on Intel's E1000 line of gigabit ethernet boards
51eql.txt 49eql.txt
52 - serial IP load balancing 50 - serial IP load balancing
53ethertap.txt
54 - the Ethertap user space packet reception and transmission driver
55ewrk3.txt 51ewrk3.txt
56 - the Digital EtherWORKS 3 DE203/4/5 Ethernet driver 52 - the Digital EtherWORKS 3 DE203/4/5 Ethernet driver
57filter.txt 53filter.txt
@@ -104,8 +100,6 @@ tuntap.txt
104 - TUN/TAP device driver, allowing user space Rx/Tx of packets. 100 - TUN/TAP device driver, allowing user space Rx/Tx of packets.
105vortex.txt 101vortex.txt
106 - info on using 3Com Vortex (3c590, 3c592, 3c595, 3c597) Ethernet cards. 102 - info on using 3Com Vortex (3c590, 3c592, 3c595, 3c597) Ethernet cards.
107wavelan.txt
108 - AT&T GIS (nee NCR) WaveLAN card: An Ethernet-like radio transceiver
109x25.txt 103x25.txt
110 - general info on X.25 development. 104 - general info on X.25 development.
111x25-iface.txt 105x25-iface.txt
diff --git a/Documentation/networking/dns_resolver.txt b/Documentation/networking/dns_resolver.txt
index aefd1e681804..04ca06325b08 100644
--- a/Documentation/networking/dns_resolver.txt
+++ b/Documentation/networking/dns_resolver.txt
@@ -61,7 +61,6 @@ before the more general line given above as the first match is the one taken.
61 create dns_resolver foo:* * /usr/sbin/dns.foo %k 61 create dns_resolver foo:* * /usr/sbin/dns.foo %k
62 62
63 63
64
65===== 64=====
66USAGE 65USAGE
67===== 66=====
@@ -104,6 +103,14 @@ implemented in the module can be called after doing:
104 returned also. 103 returned also.
105 104
106 105
106===============================
107READING DNS KEYS FROM USERSPACE
108===============================
109
110Keys of dns_resolver type can be read from userspace using keyctl_read() or
111"keyctl read/print/pipe".
112
113
107========= 114=========
108MECHANISM 115MECHANISM
109========= 116=========
diff --git a/Documentation/rtc.txt b/Documentation/rtc.txt
index 9104c1062084..250160469d83 100644
--- a/Documentation/rtc.txt
+++ b/Documentation/rtc.txt
@@ -178,38 +178,29 @@ RTC class framework, but can't be supported by the older driver.
178 setting the longer alarm time and enabling its IRQ using a single 178 setting the longer alarm time and enabling its IRQ using a single
179 request (using the same model as EFI firmware). 179 request (using the same model as EFI firmware).
180 180
181 * RTC_UIE_ON, RTC_UIE_OFF ... if the RTC offers IRQs, it probably 181 * RTC_UIE_ON, RTC_UIE_OFF ... if the RTC offers IRQs, the RTC framework
182 also offers update IRQs whenever the "seconds" counter changes. 182 will emulate this mechanism.
183 If needed, the RTC framework can emulate this mechanism.
184 183
185 * RTC_PIE_ON, RTC_PIE_OFF, RTC_IRQP_SET, RTC_IRQP_READ ... another 184 * RTC_PIE_ON, RTC_PIE_OFF, RTC_IRQP_SET, RTC_IRQP_READ ... these icotls
186 feature often accessible with an IRQ line is a periodic IRQ, issued 185 are emulated via a kernel hrtimer.
187 at settable frequencies (usually 2^N Hz).
188 186
189In many cases, the RTC alarm can be a system wake event, used to force 187In many cases, the RTC alarm can be a system wake event, used to force
190Linux out of a low power sleep state (or hibernation) back to a fully 188Linux out of a low power sleep state (or hibernation) back to a fully
191operational state. For example, a system could enter a deep power saving 189operational state. For example, a system could enter a deep power saving
192state until it's time to execute some scheduled tasks. 190state until it's time to execute some scheduled tasks.
193 191
194Note that many of these ioctls need not actually be implemented by your 192Note that many of these ioctls are handled by the common rtc-dev interface.
195driver. The common rtc-dev interface handles many of these nicely if your 193Some common examples:
196driver returns ENOIOCTLCMD. Some common examples:
197 194
198 * RTC_RD_TIME, RTC_SET_TIME: the read_time/set_time functions will be 195 * RTC_RD_TIME, RTC_SET_TIME: the read_time/set_time functions will be
199 called with appropriate values. 196 called with appropriate values.
200 197
201 * RTC_ALM_SET, RTC_ALM_READ, RTC_WKALM_SET, RTC_WKALM_RD: the 198 * RTC_ALM_SET, RTC_ALM_READ, RTC_WKALM_SET, RTC_WKALM_RD: gets or sets
202 set_alarm/read_alarm functions will be called. 199 the alarm rtc_timer. May call the set_alarm driver function.
203 200
204 * RTC_IRQP_SET, RTC_IRQP_READ: the irq_set_freq function will be called 201 * RTC_IRQP_SET, RTC_IRQP_READ: These are emulated by the generic code.
205 to set the frequency while the framework will handle the read for you
206 since the frequency is stored in the irq_freq member of the rtc_device
207 structure. Your driver needs to initialize the irq_freq member during
208 init. Make sure you check the requested frequency is in range of your
209 hardware in the irq_set_freq function. If it isn't, return -EINVAL. If
210 you cannot actually change the frequency, do not define irq_set_freq.
211 202
212 * RTC_PIE_ON, RTC_PIE_OFF: the irq_set_state function will be called. 203 * RTC_PIE_ON, RTC_PIE_OFF: These are also emulated by the generic code.
213 204
214If all else fails, check out the rtc-test.c driver! 205If all else fails, check out the rtc-test.c driver!
215 206
diff --git a/Documentation/spinlocks.txt b/Documentation/spinlocks.txt
index 178c831b907d..2e3c64b1a6a5 100644
--- a/Documentation/spinlocks.txt
+++ b/Documentation/spinlocks.txt
@@ -86,7 +86,7 @@ to change the variables it has to get an exclusive write lock.
86 86
87The routines look the same as above: 87The routines look the same as above:
88 88
89 rwlock_t xxx_lock = RW_LOCK_UNLOCKED; 89 rwlock_t xxx_lock = __RW_LOCK_UNLOCKED(xxx_lock);
90 90
91 unsigned long flags; 91 unsigned long flags;
92 92
@@ -196,25 +196,3 @@ appropriate:
196 196
197For static initialization, use DEFINE_SPINLOCK() / DEFINE_RWLOCK() or 197For static initialization, use DEFINE_SPINLOCK() / DEFINE_RWLOCK() or
198__SPIN_LOCK_UNLOCKED() / __RW_LOCK_UNLOCKED() as appropriate. 198__SPIN_LOCK_UNLOCKED() / __RW_LOCK_UNLOCKED() as appropriate.
199
200SPIN_LOCK_UNLOCKED and RW_LOCK_UNLOCKED are deprecated. These interfere
201with lockdep state tracking.
202
203Most of the time, you can simply turn:
204 static spinlock_t xxx_lock = SPIN_LOCK_UNLOCKED;
205into:
206 static DEFINE_SPINLOCK(xxx_lock);
207
208Static structure member variables go from:
209
210 struct foo bar {
211 .lock = SPIN_LOCK_UNLOCKED;
212 };
213
214to:
215
216 struct foo bar {
217 .lock = __SPIN_LOCK_UNLOCKED(bar.lock);
218 };
219
220Declaration of static rw_locks undergo a similar transformation.
diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt
index dc52bd442c92..79fcafc7fd64 100644
--- a/Documentation/trace/ftrace-design.txt
+++ b/Documentation/trace/ftrace-design.txt
@@ -247,6 +247,13 @@ You need very few things to get the syscalls tracing in an arch.
247- Support the TIF_SYSCALL_TRACEPOINT thread flags. 247- Support the TIF_SYSCALL_TRACEPOINT thread flags.
248- Put the trace_sys_enter() and trace_sys_exit() tracepoints calls from ptrace 248- Put the trace_sys_enter() and trace_sys_exit() tracepoints calls from ptrace
249 in the ptrace syscalls tracing path. 249 in the ptrace syscalls tracing path.
250- If the system call table on this arch is more complicated than a simple array
251 of addresses of the system calls, implement an arch_syscall_addr to return
252 the address of a given system call.
253- If the symbol names of the system calls do not match the function names on
254 this arch, define ARCH_HAS_SYSCALL_MATCH_SYM_NAME in asm/ftrace.h and
255 implement arch_syscall_match_sym_name with the appropriate logic to return
256 true if the function name corresponds with the symbol name.
250- Tag this arch as HAVE_SYSCALL_TRACEPOINTS. 257- Tag this arch as HAVE_SYSCALL_TRACEPOINTS.
251 258
252 259
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 557c1edeccaf..1ebc24cf9a55 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -80,11 +80,11 @@ of ftrace. Here is a list of some of the key files:
80 tracers listed here can be configured by 80 tracers listed here can be configured by
81 echoing their name into current_tracer. 81 echoing their name into current_tracer.
82 82
83 tracing_enabled: 83 tracing_on:
84 84
85 This sets or displays whether the current_tracer 85 This sets or displays whether writing to the trace
86 is activated and tracing or not. Echo 0 into this 86 ring buffer is enabled. Echo 0 into this file to disable
87 file to disable the tracer or 1 to enable it. 87 the tracer or 1 to enable it.
88 88
89 trace: 89 trace:
90 90
@@ -202,10 +202,6 @@ Here is the list of current tracers that may be configured.
202 to draw a graph of function calls similar to C code 202 to draw a graph of function calls similar to C code
203 source. 203 source.
204 204
205 "sched_switch"
206
207 Traces the context switches and wakeups between tasks.
208
209 "irqsoff" 205 "irqsoff"
210 206
211 Traces the areas that disable interrupts and saves 207 Traces the areas that disable interrupts and saves
@@ -273,39 +269,6 @@ format, the function name that was traced "path_put" and the
273parent function that called this function "path_walk". The 269parent function that called this function "path_walk". The
274timestamp is the time at which the function was entered. 270timestamp is the time at which the function was entered.
275 271
276The sched_switch tracer also includes tracing of task wakeups
277and context switches.
278
279 ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 2916:115:S
280 ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 10:115:S
281 ksoftirqd/1-7 [01] 1453.070013: 7:115:R ==> 10:115:R
282 events/1-10 [01] 1453.070013: 10:115:S ==> 2916:115:R
283 kondemand/1-2916 [01] 1453.070013: 2916:115:S ==> 7:115:R
284 ksoftirqd/1-7 [01] 1453.070013: 7:115:S ==> 0:140:R
285
286Wake ups are represented by a "+" and the context switches are
287shown as "==>". The format is:
288
289 Context switches:
290
291 Previous task Next Task
292
293 <pid>:<prio>:<state> ==> <pid>:<prio>:<state>
294
295 Wake ups:
296
297 Current task Task waking up
298
299 <pid>:<prio>:<state> + <pid>:<prio>:<state>
300
301The prio is the internal kernel priority, which is the inverse
302of the priority that is usually displayed by user-space tools.
303Zero represents the highest priority (99). Prio 100 starts the
304"nice" priorities with 100 being equal to nice -20 and 139 being
305nice 19. The prio "140" is reserved for the idle task which is
306the lowest priority thread (pid 0).
307
308
309Latency trace format 272Latency trace format
310-------------------- 273--------------------
311 274
@@ -491,78 +454,10 @@ x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6]
491 latencies, as described in "Latency 454 latencies, as described in "Latency
492 trace format". 455 trace format".
493 456
494sched_switch 457 overwrite - This controls what happens when the trace buffer is
495------------ 458 full. If "1" (default), the oldest events are
496 459 discarded and overwritten. If "0", then the newest
497This tracer simply records schedule switches. Here is an example 460 events are discarded.
498of how to use it.
499
500 # echo sched_switch > current_tracer
501 # echo 1 > tracing_enabled
502 # sleep 1
503 # echo 0 > tracing_enabled
504 # cat trace
505
506# tracer: sched_switch
507#
508# TASK-PID CPU# TIMESTAMP FUNCTION
509# | | | | |
510 bash-3997 [01] 240.132281: 3997:120:R + 4055:120:R
511 bash-3997 [01] 240.132284: 3997:120:R ==> 4055:120:R
512 sleep-4055 [01] 240.132371: 4055:120:S ==> 3997:120:R
513 bash-3997 [01] 240.132454: 3997:120:R + 4055:120:S
514 bash-3997 [01] 240.132457: 3997:120:R ==> 4055:120:R
515 sleep-4055 [01] 240.132460: 4055:120:D ==> 3997:120:R
516 bash-3997 [01] 240.132463: 3997:120:R + 4055:120:D
517 bash-3997 [01] 240.132465: 3997:120:R ==> 4055:120:R
518 <idle>-0 [00] 240.132589: 0:140:R + 4:115:S
519 <idle>-0 [00] 240.132591: 0:140:R ==> 4:115:R
520 ksoftirqd/0-4 [00] 240.132595: 4:115:S ==> 0:140:R
521 <idle>-0 [00] 240.132598: 0:140:R + 4:115:S
522 <idle>-0 [00] 240.132599: 0:140:R ==> 4:115:R
523 ksoftirqd/0-4 [00] 240.132603: 4:115:S ==> 0:140:R
524 sleep-4055 [01] 240.133058: 4055:120:S ==> 3997:120:R
525 [...]
526
527
528As we have discussed previously about this format, the header
529shows the name of the trace and points to the options. The
530"FUNCTION" is a misnomer since here it represents the wake ups
531and context switches.
532
533The sched_switch file only lists the wake ups (represented with
534'+') and context switches ('==>') with the previous task or
535current task first followed by the next task or task waking up.
536The format for both of these is PID:KERNEL-PRIO:TASK-STATE.
537Remember that the KERNEL-PRIO is the inverse of the actual
538priority with zero (0) being the highest priority and the nice
539values starting at 100 (nice -20). Below is a quick chart to map
540the kernel priority to user land priorities.
541
542 Kernel Space User Space
543 ===============================================================
544 0(high) to 98(low) user RT priority 99(high) to 1(low)
545 with SCHED_RR or SCHED_FIFO
546 ---------------------------------------------------------------
547 99 sched_priority is not used in scheduling
548 decisions(it must be specified as 0)
549 ---------------------------------------------------------------
550 100(high) to 139(low) user nice -20(high) to 19(low)
551 ---------------------------------------------------------------
552 140 idle task priority
553 ---------------------------------------------------------------
554
555The task states are:
556
557 R - running : wants to run, may not actually be running
558 S - sleep : process is waiting to be woken up (handles signals)
559 D - disk sleep (uninterruptible sleep) : process must be woken up
560 (ignores signals)
561 T - stopped : process suspended
562 t - traced : process is being traced (with something like gdb)
563 Z - zombie : process waiting to be cleaned up
564 X - unknown
565
566 461
567ftrace_enabled 462ftrace_enabled
568-------------- 463--------------
@@ -607,10 +502,10 @@ an example:
607 # echo irqsoff > current_tracer 502 # echo irqsoff > current_tracer
608 # echo latency-format > trace_options 503 # echo latency-format > trace_options
609 # echo 0 > tracing_max_latency 504 # echo 0 > tracing_max_latency
610 # echo 1 > tracing_enabled 505 # echo 1 > tracing_on
611 # ls -ltr 506 # ls -ltr
612 [...] 507 [...]
613 # echo 0 > tracing_enabled 508 # echo 0 > tracing_on
614 # cat trace 509 # cat trace
615# tracer: irqsoff 510# tracer: irqsoff
616# 511#
@@ -715,10 +610,10 @@ is much like the irqsoff tracer.
715 # echo preemptoff > current_tracer 610 # echo preemptoff > current_tracer
716 # echo latency-format > trace_options 611 # echo latency-format > trace_options
717 # echo 0 > tracing_max_latency 612 # echo 0 > tracing_max_latency
718 # echo 1 > tracing_enabled 613 # echo 1 > tracing_on
719 # ls -ltr 614 # ls -ltr
720 [...] 615 [...]
721 # echo 0 > tracing_enabled 616 # echo 0 > tracing_on
722 # cat trace 617 # cat trace
723# tracer: preemptoff 618# tracer: preemptoff
724# 619#
@@ -863,10 +758,10 @@ tracers.
863 # echo preemptirqsoff > current_tracer 758 # echo preemptirqsoff > current_tracer
864 # echo latency-format > trace_options 759 # echo latency-format > trace_options
865 # echo 0 > tracing_max_latency 760 # echo 0 > tracing_max_latency
866 # echo 1 > tracing_enabled 761 # echo 1 > tracing_on
867 # ls -ltr 762 # ls -ltr
868 [...] 763 [...]
869 # echo 0 > tracing_enabled 764 # echo 0 > tracing_on
870 # cat trace 765 # cat trace
871# tracer: preemptirqsoff 766# tracer: preemptirqsoff
872# 767#
@@ -1026,9 +921,9 @@ Instead of performing an 'ls', we will run 'sleep 1' under
1026 # echo wakeup > current_tracer 921 # echo wakeup > current_tracer
1027 # echo latency-format > trace_options 922 # echo latency-format > trace_options
1028 # echo 0 > tracing_max_latency 923 # echo 0 > tracing_max_latency
1029 # echo 1 > tracing_enabled 924 # echo 1 > tracing_on
1030 # chrt -f 5 sleep 1 925 # chrt -f 5 sleep 1
1031 # echo 0 > tracing_enabled 926 # echo 0 > tracing_on
1032 # cat trace 927 # cat trace
1033# tracer: wakeup 928# tracer: wakeup
1034# 929#
@@ -1140,9 +1035,9 @@ ftrace_enabled is set; otherwise this tracer is a nop.
1140 1035
1141 # sysctl kernel.ftrace_enabled=1 1036 # sysctl kernel.ftrace_enabled=1
1142 # echo function > current_tracer 1037 # echo function > current_tracer
1143 # echo 1 > tracing_enabled 1038 # echo 1 > tracing_on
1144 # usleep 1 1039 # usleep 1
1145 # echo 0 > tracing_enabled 1040 # echo 0 > tracing_on
1146 # cat trace 1041 # cat trace
1147# tracer: function 1042# tracer: function
1148# 1043#
@@ -1180,7 +1075,7 @@ int trace_fd;
1180[...] 1075[...]
1181int main(int argc, char *argv[]) { 1076int main(int argc, char *argv[]) {
1182 [...] 1077 [...]
1183 trace_fd = open(tracing_file("tracing_enabled"), O_WRONLY); 1078 trace_fd = open(tracing_file("tracing_on"), O_WRONLY);
1184 [...] 1079 [...]
1185 if (condition_hit()) { 1080 if (condition_hit()) {
1186 write(trace_fd, "0", 1); 1081 write(trace_fd, "0", 1);
@@ -1631,9 +1526,9 @@ If I am only interested in sys_nanosleep and hrtimer_interrupt:
1631 # echo sys_nanosleep hrtimer_interrupt \ 1526 # echo sys_nanosleep hrtimer_interrupt \
1632 > set_ftrace_filter 1527 > set_ftrace_filter
1633 # echo function > current_tracer 1528 # echo function > current_tracer
1634 # echo 1 > tracing_enabled 1529 # echo 1 > tracing_on
1635 # usleep 1 1530 # usleep 1
1636 # echo 0 > tracing_enabled 1531 # echo 0 > tracing_on
1637 # cat trace 1532 # cat trace
1638# tracer: ftrace 1533# tracer: ftrace
1639# 1534#
@@ -1879,9 +1774,9 @@ different. The trace is live.
1879 # echo function > current_tracer 1774 # echo function > current_tracer
1880 # cat trace_pipe > /tmp/trace.out & 1775 # cat trace_pipe > /tmp/trace.out &
1881[1] 4153 1776[1] 4153
1882 # echo 1 > tracing_enabled 1777 # echo 1 > tracing_on
1883 # usleep 1 1778 # usleep 1
1884 # echo 0 > tracing_enabled 1779 # echo 0 > tracing_on
1885 # cat trace 1780 # cat trace
1886# tracer: function 1781# tracer: function
1887# 1782#
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index 5f77d94598dd..6d27ab8d6e9f 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -42,11 +42,25 @@ Synopsis of kprobe_events
42 +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**) 42 +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
43 NAME=FETCHARG : Set NAME as the argument name of FETCHARG. 43 NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
44 FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types 44 FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
45 (u8/u16/u32/u64/s8/s16/s32/s64) and string are supported. 45 (u8/u16/u32/u64/s8/s16/s32/s64), "string" and bitfield
46 are supported.
46 47
47 (*) only for return probe. 48 (*) only for return probe.
48 (**) this is useful for fetching a field of data structures. 49 (**) this is useful for fetching a field of data structures.
49 50
51Types
52-----
53Several types are supported for fetch-args. Kprobe tracer will access memory
54by given type. Prefix 's' and 'u' means those types are signed and unsigned
55respectively. Traced arguments are shown in decimal (signed) or hex (unsigned).
56String type is a special type, which fetches a "null-terminated" string from
57kernel space. This means it will fail and store NULL if the string container
58has been paged out.
59Bitfield is another special type, which takes 3 parameters, bit-width, bit-
60offset, and container-size (usually 32). The syntax is;
61
62 b<bit-width>@<bit-offset>/<container-size>
63
50 64
51Per-Probe Event Filtering 65Per-Probe Event Filtering
52------------------------- 66-------------------------
diff --git a/MAINTAINERS b/MAINTAINERS
index 6f99e1260db8..f1bc3dc6b369 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1010,6 +1010,15 @@ L: linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
1010S: Maintained 1010S: Maintained
1011F: arch/arm/mach-s5p*/ 1011F: arch/arm/mach-s5p*/
1012 1012
1013ARM/SAMSUNG MOBILE MACHINE SUPPORT
1014M: Kyungmin Park <kyungmin.park@samsung.com>
1015L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
1016S: Maintained
1017F: arch/arm/mach-s5pv210/mach-aquila.c
1018F: arch/arm/mach-s5pv210/mach-goni.c
1019F: arch/arm/mach-exynos4/mach-universal_c210.c
1020F: arch/arm/mach-exynos4/mach-nuri.c
1021
1013ARM/SAMSUNG S5P SERIES FIMC SUPPORT 1022ARM/SAMSUNG S5P SERIES FIMC SUPPORT
1014M: Kyungmin Park <kyungmin.park@samsung.com> 1023M: Kyungmin Park <kyungmin.park@samsung.com>
1015M: Sylwester Nawrocki <s.nawrocki@samsung.com> 1024M: Sylwester Nawrocki <s.nawrocki@samsung.com>
@@ -1467,6 +1476,7 @@ F: include/net/bluetooth/
1467 1476
1468BONDING DRIVER 1477BONDING DRIVER
1469M: Jay Vosburgh <fubar@us.ibm.com> 1478M: Jay Vosburgh <fubar@us.ibm.com>
1479M: Andy Gospodarek <andy@greyhouse.net>
1470L: netdev@vger.kernel.org 1480L: netdev@vger.kernel.org
1471W: http://sourceforge.net/projects/bonding/ 1481W: http://sourceforge.net/projects/bonding/
1472S: Supported 1482S: Supported
@@ -1692,6 +1702,13 @@ M: Andy Whitcroft <apw@canonical.com>
1692S: Supported 1702S: Supported
1693F: scripts/checkpatch.pl 1703F: scripts/checkpatch.pl
1694 1704
1705CHINESE DOCUMENTATION
1706M: Harry Wei <harryxiyou@gmail.com>
1707L: xiyoulinuxkernelgroup@googlegroups.com
1708L: linux-kernel@zh-kernel.org (moderated for non-subscribers)
1709S: Maintained
1710F: Documentation/zh_CN/
1711
1695CISCO VIC ETHERNET NIC DRIVER 1712CISCO VIC ETHERNET NIC DRIVER
1696M: Vasanthy Kolluri <vkolluri@cisco.com> 1713M: Vasanthy Kolluri <vkolluri@cisco.com>
1697M: Roopa Prabhu <roprabhu@cisco.com> 1714M: Roopa Prabhu <roprabhu@cisco.com>
@@ -2026,7 +2043,7 @@ F: Documentation/scsi/dc395x.txt
2026F: drivers/scsi/dc395x.* 2043F: drivers/scsi/dc395x.*
2027 2044
2028DCCP PROTOCOL 2045DCCP PROTOCOL
2029M: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> 2046M: Gerrit Renker <gerrit@erg.abdn.ac.uk>
2030L: dccp@vger.kernel.org 2047L: dccp@vger.kernel.org
2031W: http://www.linuxfoundation.org/collaborate/workgroups/networking/dccp 2048W: http://www.linuxfoundation.org/collaborate/workgroups/networking/dccp
2032S: Maintained 2049S: Maintained
@@ -3512,7 +3529,7 @@ F: drivers/hwmon/jc42.c
3512F: Documentation/hwmon/jc42 3529F: Documentation/hwmon/jc42
3513 3530
3514JFS FILESYSTEM 3531JFS FILESYSTEM
3515M: Dave Kleikamp <shaggy@linux.vnet.ibm.com> 3532M: Dave Kleikamp <shaggy@kernel.org>
3516L: jfs-discussion@lists.sourceforge.net 3533L: jfs-discussion@lists.sourceforge.net
3517W: http://jfs.sourceforge.net/ 3534W: http://jfs.sourceforge.net/
3518T: git git://git.kernel.org/pub/scm/linux/kernel/git/shaggy/jfs-2.6.git 3535T: git git://git.kernel.org/pub/scm/linux/kernel/git/shaggy/jfs-2.6.git
@@ -4275,10 +4292,7 @@ S: Maintained
4275F: net/sched/sch_netem.c 4292F: net/sched/sch_netem.c
4276 4293
4277NETERION 10GbE DRIVERS (s2io/vxge) 4294NETERION 10GbE DRIVERS (s2io/vxge)
4278M: Ramkrishna Vepa <ramkrishna.vepa@exar.com> 4295M: Jon Mason <jdmason@kudzu.us>
4279M: Sivakumar Subramani <sivakumar.subramani@exar.com>
4280M: Sreenivasa Honnur <sreenivasa.honnur@exar.com>
4281M: Jon Mason <jon.mason@exar.com>
4282L: netdev@vger.kernel.org 4296L: netdev@vger.kernel.org
4283W: http://trac.neterion.com/cgi-bin/trac.cgi/wiki/Linux?Anonymous 4297W: http://trac.neterion.com/cgi-bin/trac.cgi/wiki/Linux?Anonymous
4284W: http://trac.neterion.com/cgi-bin/trac.cgi/wiki/X3100Linux?Anonymous 4298W: http://trac.neterion.com/cgi-bin/trac.cgi/wiki/X3100Linux?Anonymous
@@ -5164,6 +5178,7 @@ F: drivers/char/random.c
5164 5178
5165RAPIDIO SUBSYSTEM 5179RAPIDIO SUBSYSTEM
5166M: Matt Porter <mporter@kernel.crashing.org> 5180M: Matt Porter <mporter@kernel.crashing.org>
5181M: Alexandre Bounine <alexandre.bounine@idt.com>
5167S: Maintained 5182S: Maintained
5168F: drivers/rapidio/ 5183F: drivers/rapidio/
5169 5184
@@ -5266,7 +5281,7 @@ S: Maintained
5266F: drivers/net/wireless/rtl818x/rtl8180/ 5281F: drivers/net/wireless/rtl818x/rtl8180/
5267 5282
5268RTL8187 WIRELESS DRIVER 5283RTL8187 WIRELESS DRIVER
5269M: Herton Ronaldo Krzesinski <herton@mandriva.com.br> 5284M: Herton Ronaldo Krzesinski <herton@canonical.com>
5270M: Hin-Tak Leung <htl10@users.sourceforge.net> 5285M: Hin-Tak Leung <htl10@users.sourceforge.net>
5271M: Larry Finger <Larry.Finger@lwfinger.net> 5286M: Larry Finger <Larry.Finger@lwfinger.net>
5272L: linux-wireless@vger.kernel.org 5287L: linux-wireless@vger.kernel.org
@@ -6104,7 +6119,7 @@ S: Maintained
6104F: security/tomoyo/ 6119F: security/tomoyo/
6105 6120
6106TOPSTAR LAPTOP EXTRAS DRIVER 6121TOPSTAR LAPTOP EXTRAS DRIVER
6107M: Herton Ronaldo Krzesinski <herton@mandriva.com.br> 6122M: Herton Ronaldo Krzesinski <herton@canonical.com>
6108L: platform-driver-x86@vger.kernel.org 6123L: platform-driver-x86@vger.kernel.org
6109S: Maintained 6124S: Maintained
6110F: drivers/platform/x86/topstar-laptop.c 6125F: drivers/platform/x86/topstar-laptop.c
diff --git a/Makefile b/Makefile
index 26d7d824db51..d6592b63c8cb 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
1VERSION = 2 1VERSION = 2
2PATCHLEVEL = 6 2PATCHLEVEL = 6
3SUBLEVEL = 38 3SUBLEVEL = 38
4EXTRAVERSION = -rc6 4EXTRAVERSION =
5NAME = Flesh-Eating Bats with Fangs 5NAME = Flesh-Eating Bats with Fangs
6 6
7# *DOCUMENTATION* 7# *DOCUMENTATION*
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 47f63d480141..cc31bec2e316 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -11,6 +11,7 @@ config ALPHA
11 select HAVE_GENERIC_HARDIRQS 11 select HAVE_GENERIC_HARDIRQS
12 select GENERIC_IRQ_PROBE 12 select GENERIC_IRQ_PROBE
13 select AUTO_IRQ_AFFINITY if SMP 13 select AUTO_IRQ_AFFINITY if SMP
14 select GENERIC_HARDIRQS_NO_DEPRECATED
14 help 15 help
15 The Alpha is a 64-bit general-purpose processor designed and 16 The Alpha is a 64-bit general-purpose processor designed and
16 marketed by the Digital Equipment Corporation of blessed memory, 17 marketed by the Digital Equipment Corporation of blessed memory,
diff --git a/arch/alpha/include/asm/futex.h b/arch/alpha/include/asm/futex.h
index 945de222ab91..e8a761aee088 100644
--- a/arch/alpha/include/asm/futex.h
+++ b/arch/alpha/include/asm/futex.h
@@ -29,7 +29,7 @@
29 : "r" (uaddr), "r"(oparg) \ 29 : "r" (uaddr), "r"(oparg) \
30 : "memory") 30 : "memory")
31 31
32static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) 32static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
33{ 33{
34 int op = (encoded_op >> 28) & 7; 34 int op = (encoded_op >> 28) & 7;
35 int cmp = (encoded_op >> 24) & 15; 35 int cmp = (encoded_op >> 24) & 15;
@@ -39,7 +39,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
39 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 39 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
40 oparg = 1 << oparg; 40 oparg = 1 << oparg;
41 41
42 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 42 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
43 return -EFAULT; 43 return -EFAULT;
44 44
45 pagefault_disable(); 45 pagefault_disable();
@@ -81,21 +81,23 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
81} 81}
82 82
83static inline int 83static inline int
84futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 84futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
85 u32 oldval, u32 newval)
85{ 86{
86 int prev, cmp; 87 int ret = 0, cmp;
88 u32 prev;
87 89
88 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 90 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
89 return -EFAULT; 91 return -EFAULT;
90 92
91 __asm__ __volatile__ ( 93 __asm__ __volatile__ (
92 __ASM_SMP_MB 94 __ASM_SMP_MB
93 "1: ldl_l %0,0(%2)\n" 95 "1: ldl_l %1,0(%3)\n"
94 " cmpeq %0,%3,%1\n" 96 " cmpeq %1,%4,%2\n"
95 " beq %1,3f\n" 97 " beq %2,3f\n"
96 " mov %4,%1\n" 98 " mov %5,%2\n"
97 "2: stl_c %1,0(%2)\n" 99 "2: stl_c %2,0(%3)\n"
98 " beq %1,4f\n" 100 " beq %2,4f\n"
99 "3: .subsection 2\n" 101 "3: .subsection 2\n"
100 "4: br 1b\n" 102 "4: br 1b\n"
101 " .previous\n" 103 " .previous\n"
@@ -105,11 +107,12 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
105 " .long 2b-.\n" 107 " .long 2b-.\n"
106 " lda $31,3b-2b(%0)\n" 108 " lda $31,3b-2b(%0)\n"
107 " .previous\n" 109 " .previous\n"
108 : "=&r"(prev), "=&r"(cmp) 110 : "+r"(ret), "=&r"(prev), "=&r"(cmp)
109 : "r"(uaddr), "r"((long)oldval), "r"(newval) 111 : "r"(uaddr), "r"((long)oldval), "r"(newval)
110 : "memory"); 112 : "memory");
111 113
112 return prev; 114 *uval = prev;
115 return ret;
113} 116}
114 117
115#endif /* __KERNEL__ */ 118#endif /* __KERNEL__ */
diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h
index 1570c0b54336..a83bbea62c67 100644
--- a/arch/alpha/include/asm/rwsem.h
+++ b/arch/alpha/include/asm/rwsem.h
@@ -13,44 +13,13 @@
13#ifdef __KERNEL__ 13#ifdef __KERNEL__
14 14
15#include <linux/compiler.h> 15#include <linux/compiler.h>
16#include <linux/list.h>
17#include <linux/spinlock.h>
18 16
19struct rwsem_waiter;
20
21extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
22extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
23extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
24extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
25
26/*
27 * the semaphore definition
28 */
29struct rw_semaphore {
30 long count;
31#define RWSEM_UNLOCKED_VALUE 0x0000000000000000L 17#define RWSEM_UNLOCKED_VALUE 0x0000000000000000L
32#define RWSEM_ACTIVE_BIAS 0x0000000000000001L 18#define RWSEM_ACTIVE_BIAS 0x0000000000000001L
33#define RWSEM_ACTIVE_MASK 0x00000000ffffffffL 19#define RWSEM_ACTIVE_MASK 0x00000000ffffffffL
34#define RWSEM_WAITING_BIAS (-0x0000000100000000L) 20#define RWSEM_WAITING_BIAS (-0x0000000100000000L)
35#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS 21#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
36#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 22#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
37 spinlock_t wait_lock;
38 struct list_head wait_list;
39};
40
41#define __RWSEM_INITIALIZER(name) \
42 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
43 LIST_HEAD_INIT((name).wait_list) }
44
45#define DECLARE_RWSEM(name) \
46 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
47
48static inline void init_rwsem(struct rw_semaphore *sem)
49{
50 sem->count = RWSEM_UNLOCKED_VALUE;
51 spin_lock_init(&sem->wait_lock);
52 INIT_LIST_HEAD(&sem->wait_list);
53}
54 23
55static inline void __down_read(struct rw_semaphore *sem) 24static inline void __down_read(struct rw_semaphore *sem)
56{ 25{
@@ -250,10 +219,5 @@ static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem)
250#endif 219#endif
251} 220}
252 221
253static inline int rwsem_is_locked(struct rw_semaphore *sem)
254{
255 return (sem->count != 0);
256}
257
258#endif /* __KERNEL__ */ 222#endif /* __KERNEL__ */
259#endif /* _ALPHA_RWSEM_H */ 223#endif /* _ALPHA_RWSEM_H */
diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
index 9ab234f48dd8..a19d60082299 100644
--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -44,11 +44,16 @@ static char irq_user_affinity[NR_IRQS];
44 44
45int irq_select_affinity(unsigned int irq) 45int irq_select_affinity(unsigned int irq)
46{ 46{
47 struct irq_desc *desc = irq_to_desc[irq]; 47 struct irq_data *data = irq_get_irq_data(irq);
48 struct irq_chip *chip;
48 static int last_cpu; 49 static int last_cpu;
49 int cpu = last_cpu + 1; 50 int cpu = last_cpu + 1;
50 51
51 if (!desc || !get_irq_desc_chip(desc)->set_affinity || irq_user_affinity[irq]) 52 if (!data)
53 return 1;
54 chip = irq_data_get_irq_chip(data);
55
56 if (!chip->irq_set_affinity || irq_user_affinity[irq])
52 return 1; 57 return 1;
53 58
54 while (!cpu_possible(cpu) || 59 while (!cpu_possible(cpu) ||
@@ -56,8 +61,8 @@ int irq_select_affinity(unsigned int irq)
56 cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0); 61 cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0);
57 last_cpu = cpu; 62 last_cpu = cpu;
58 63
59 cpumask_copy(desc->affinity, cpumask_of(cpu)); 64 cpumask_copy(data->affinity, cpumask_of(cpu));
60 get_irq_desc_chip(desc)->set_affinity(irq, cpumask_of(cpu)); 65 chip->irq_set_affinity(data, cpumask_of(cpu), false);
61 return 0; 66 return 0;
62} 67}
63#endif /* CONFIG_SMP */ 68#endif /* CONFIG_SMP */
diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c
index 2d0679b60939..411ca11d0a18 100644
--- a/arch/alpha/kernel/irq_alpha.c
+++ b/arch/alpha/kernel/irq_alpha.c
@@ -228,14 +228,9 @@ struct irqaction timer_irqaction = {
228void __init 228void __init
229init_rtc_irq(void) 229init_rtc_irq(void)
230{ 230{
231 struct irq_desc *desc = irq_to_desc(RTC_IRQ); 231 set_irq_chip_and_handler_name(RTC_IRQ, &no_irq_chip,
232 232 handle_simple_irq, "RTC");
233 if (desc) { 233 setup_irq(RTC_IRQ, &timer_irqaction);
234 desc->status |= IRQ_DISABLED;
235 set_irq_chip_and_handler_name(RTC_IRQ, &no_irq_chip,
236 handle_simple_irq, "RTC");
237 setup_irq(RTC_IRQ, &timer_irqaction);
238 }
239} 234}
240 235
241/* Dummy irqactions. */ 236/* Dummy irqactions. */
diff --git a/arch/alpha/kernel/irq_i8259.c b/arch/alpha/kernel/irq_i8259.c
index 956ea0ed1694..c7cc9813e45f 100644
--- a/arch/alpha/kernel/irq_i8259.c
+++ b/arch/alpha/kernel/irq_i8259.c
@@ -33,10 +33,10 @@ i8259_update_irq_hw(unsigned int irq, unsigned long mask)
33} 33}
34 34
35inline void 35inline void
36i8259a_enable_irq(unsigned int irq) 36i8259a_enable_irq(struct irq_data *d)
37{ 37{
38 spin_lock(&i8259_irq_lock); 38 spin_lock(&i8259_irq_lock);
39 i8259_update_irq_hw(irq, cached_irq_mask &= ~(1 << irq)); 39 i8259_update_irq_hw(d->irq, cached_irq_mask &= ~(1 << d->irq));
40 spin_unlock(&i8259_irq_lock); 40 spin_unlock(&i8259_irq_lock);
41} 41}
42 42
@@ -47,16 +47,18 @@ __i8259a_disable_irq(unsigned int irq)
47} 47}
48 48
49void 49void
50i8259a_disable_irq(unsigned int irq) 50i8259a_disable_irq(struct irq_data *d)
51{ 51{
52 spin_lock(&i8259_irq_lock); 52 spin_lock(&i8259_irq_lock);
53 __i8259a_disable_irq(irq); 53 __i8259a_disable_irq(d->irq);
54 spin_unlock(&i8259_irq_lock); 54 spin_unlock(&i8259_irq_lock);
55} 55}
56 56
57void 57void
58i8259a_mask_and_ack_irq(unsigned int irq) 58i8259a_mask_and_ack_irq(struct irq_data *d)
59{ 59{
60 unsigned int irq = d->irq;
61
60 spin_lock(&i8259_irq_lock); 62 spin_lock(&i8259_irq_lock);
61 __i8259a_disable_irq(irq); 63 __i8259a_disable_irq(irq);
62 64
@@ -71,9 +73,9 @@ i8259a_mask_and_ack_irq(unsigned int irq)
71 73
72struct irq_chip i8259a_irq_type = { 74struct irq_chip i8259a_irq_type = {
73 .name = "XT-PIC", 75 .name = "XT-PIC",
74 .unmask = i8259a_enable_irq, 76 .irq_unmask = i8259a_enable_irq,
75 .mask = i8259a_disable_irq, 77 .irq_mask = i8259a_disable_irq,
76 .mask_ack = i8259a_mask_and_ack_irq, 78 .irq_mask_ack = i8259a_mask_and_ack_irq,
77}; 79};
78 80
79void __init 81void __init
diff --git a/arch/alpha/kernel/irq_impl.h b/arch/alpha/kernel/irq_impl.h
index b63ccd7386f1..d507a234b05d 100644
--- a/arch/alpha/kernel/irq_impl.h
+++ b/arch/alpha/kernel/irq_impl.h
@@ -31,11 +31,9 @@ extern void init_rtc_irq(void);
31 31
32extern void common_init_isa_dma(void); 32extern void common_init_isa_dma(void);
33 33
34extern void i8259a_enable_irq(unsigned int); 34extern void i8259a_enable_irq(struct irq_data *d);
35extern void i8259a_disable_irq(unsigned int); 35extern void i8259a_disable_irq(struct irq_data *d);
36extern void i8259a_mask_and_ack_irq(unsigned int); 36extern void i8259a_mask_and_ack_irq(struct irq_data *d);
37extern unsigned int i8259a_startup_irq(unsigned int);
38extern void i8259a_end_irq(unsigned int);
39extern struct irq_chip i8259a_irq_type; 37extern struct irq_chip i8259a_irq_type;
40extern void init_i8259a_irqs(void); 38extern void init_i8259a_irqs(void);
41 39
diff --git a/arch/alpha/kernel/irq_pyxis.c b/arch/alpha/kernel/irq_pyxis.c
index 2863458c853e..b30227fa7f5f 100644
--- a/arch/alpha/kernel/irq_pyxis.c
+++ b/arch/alpha/kernel/irq_pyxis.c
@@ -29,21 +29,21 @@ pyxis_update_irq_hw(unsigned long mask)
29} 29}
30 30
31static inline void 31static inline void
32pyxis_enable_irq(unsigned int irq) 32pyxis_enable_irq(struct irq_data *d)
33{ 33{
34 pyxis_update_irq_hw(cached_irq_mask |= 1UL << (irq - 16)); 34 pyxis_update_irq_hw(cached_irq_mask |= 1UL << (d->irq - 16));
35} 35}
36 36
37static void 37static void
38pyxis_disable_irq(unsigned int irq) 38pyxis_disable_irq(struct irq_data *d)
39{ 39{
40 pyxis_update_irq_hw(cached_irq_mask &= ~(1UL << (irq - 16))); 40 pyxis_update_irq_hw(cached_irq_mask &= ~(1UL << (d->irq - 16)));
41} 41}
42 42
43static void 43static void
44pyxis_mask_and_ack_irq(unsigned int irq) 44pyxis_mask_and_ack_irq(struct irq_data *d)
45{ 45{
46 unsigned long bit = 1UL << (irq - 16); 46 unsigned long bit = 1UL << (d->irq - 16);
47 unsigned long mask = cached_irq_mask &= ~bit; 47 unsigned long mask = cached_irq_mask &= ~bit;
48 48
49 /* Disable the interrupt. */ 49 /* Disable the interrupt. */
@@ -58,9 +58,9 @@ pyxis_mask_and_ack_irq(unsigned int irq)
58 58
59static struct irq_chip pyxis_irq_type = { 59static struct irq_chip pyxis_irq_type = {
60 .name = "PYXIS", 60 .name = "PYXIS",
61 .mask_ack = pyxis_mask_and_ack_irq, 61 .irq_mask_ack = pyxis_mask_and_ack_irq,
62 .mask = pyxis_disable_irq, 62 .irq_mask = pyxis_disable_irq,
63 .unmask = pyxis_enable_irq, 63 .irq_unmask = pyxis_enable_irq,
64}; 64};
65 65
66void 66void
@@ -103,7 +103,7 @@ init_pyxis_irqs(unsigned long ignore_mask)
103 if ((ignore_mask >> i) & 1) 103 if ((ignore_mask >> i) & 1)
104 continue; 104 continue;
105 set_irq_chip_and_handler(i, &pyxis_irq_type, handle_level_irq); 105 set_irq_chip_and_handler(i, &pyxis_irq_type, handle_level_irq);
106 irq_to_desc(i)->status |= IRQ_LEVEL; 106 irq_set_status_flags(i, IRQ_LEVEL);
107 } 107 }
108 108
109 setup_irq(16+7, &isa_cascade_irqaction); 109 setup_irq(16+7, &isa_cascade_irqaction);
diff --git a/arch/alpha/kernel/irq_srm.c b/arch/alpha/kernel/irq_srm.c
index 0e57e828b413..82a47bba41c4 100644
--- a/arch/alpha/kernel/irq_srm.c
+++ b/arch/alpha/kernel/irq_srm.c
@@ -18,27 +18,27 @@
18DEFINE_SPINLOCK(srm_irq_lock); 18DEFINE_SPINLOCK(srm_irq_lock);
19 19
20static inline void 20static inline void
21srm_enable_irq(unsigned int irq) 21srm_enable_irq(struct irq_data *d)
22{ 22{
23 spin_lock(&srm_irq_lock); 23 spin_lock(&srm_irq_lock);
24 cserve_ena(irq - 16); 24 cserve_ena(d->irq - 16);
25 spin_unlock(&srm_irq_lock); 25 spin_unlock(&srm_irq_lock);
26} 26}
27 27
28static void 28static void
29srm_disable_irq(unsigned int irq) 29srm_disable_irq(struct irq_data *d)
30{ 30{
31 spin_lock(&srm_irq_lock); 31 spin_lock(&srm_irq_lock);
32 cserve_dis(irq - 16); 32 cserve_dis(d->irq - 16);
33 spin_unlock(&srm_irq_lock); 33 spin_unlock(&srm_irq_lock);
34} 34}
35 35
36/* Handle interrupts from the SRM, assuming no additional weirdness. */ 36/* Handle interrupts from the SRM, assuming no additional weirdness. */
37static struct irq_chip srm_irq_type = { 37static struct irq_chip srm_irq_type = {
38 .name = "SRM", 38 .name = "SRM",
39 .unmask = srm_enable_irq, 39 .irq_unmask = srm_enable_irq,
40 .mask = srm_disable_irq, 40 .irq_mask = srm_disable_irq,
41 .mask_ack = srm_disable_irq, 41 .irq_mask_ack = srm_disable_irq,
42}; 42};
43 43
44void __init 44void __init
@@ -52,7 +52,7 @@ init_srm_irqs(long max, unsigned long ignore_mask)
52 if (i < 64 && ((ignore_mask >> i) & 1)) 52 if (i < 64 && ((ignore_mask >> i) & 1))
53 continue; 53 continue;
54 set_irq_chip_and_handler(i, &srm_irq_type, handle_level_irq); 54 set_irq_chip_and_handler(i, &srm_irq_type, handle_level_irq);
55 irq_to_desc(i)->status |= IRQ_LEVEL; 55 irq_set_status_flags(i, IRQ_LEVEL);
56 } 56 }
57} 57}
58 58
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index fe698b5045e9..376f22130791 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -230,44 +230,24 @@ linux_to_osf_statfs(struct kstatfs *linux_stat, struct osf_statfs __user *osf_st
230 return copy_to_user(osf_stat, &tmp_stat, bufsiz) ? -EFAULT : 0; 230 return copy_to_user(osf_stat, &tmp_stat, bufsiz) ? -EFAULT : 0;
231} 231}
232 232
233static int 233SYSCALL_DEFINE3(osf_statfs, const char __user *, pathname,
234do_osf_statfs(struct path *path, struct osf_statfs __user *buffer, 234 struct osf_statfs __user *, buffer, unsigned long, bufsiz)
235 unsigned long bufsiz)
236{ 235{
237 struct kstatfs linux_stat; 236 struct kstatfs linux_stat;
238 int error = vfs_statfs(path, &linux_stat); 237 int error = user_statfs(pathname, &linux_stat);
239 if (!error) 238 if (!error)
240 error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz); 239 error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz);
241 return error; 240 return error;
242} 241}
243 242
244SYSCALL_DEFINE3(osf_statfs, const char __user *, pathname,
245 struct osf_statfs __user *, buffer, unsigned long, bufsiz)
246{
247 struct path path;
248 int retval;
249
250 retval = user_path(pathname, &path);
251 if (!retval) {
252 retval = do_osf_statfs(&path, buffer, bufsiz);
253 path_put(&path);
254 }
255 return retval;
256}
257
258SYSCALL_DEFINE3(osf_fstatfs, unsigned long, fd, 243SYSCALL_DEFINE3(osf_fstatfs, unsigned long, fd,
259 struct osf_statfs __user *, buffer, unsigned long, bufsiz) 244 struct osf_statfs __user *, buffer, unsigned long, bufsiz)
260{ 245{
261 struct file *file; 246 struct kstatfs linux_stat;
262 int retval; 247 int error = fd_statfs(fd, &linux_stat);
263 248 if (!error)
264 retval = -EBADF; 249 error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz);
265 file = fget(fd); 250 return error;
266 if (file) {
267 retval = do_osf_statfs(&file->f_path, buffer, bufsiz);
268 fput(file);
269 }
270 return retval;
271} 251}
272 252
273/* 253/*
diff --git a/arch/alpha/kernel/sys_alcor.c b/arch/alpha/kernel/sys_alcor.c
index 7bef61768236..88d95e872f55 100644
--- a/arch/alpha/kernel/sys_alcor.c
+++ b/arch/alpha/kernel/sys_alcor.c
@@ -44,31 +44,31 @@ alcor_update_irq_hw(unsigned long mask)
44} 44}
45 45
46static inline void 46static inline void
47alcor_enable_irq(unsigned int irq) 47alcor_enable_irq(struct irq_data *d)
48{ 48{
49 alcor_update_irq_hw(cached_irq_mask |= 1UL << (irq - 16)); 49 alcor_update_irq_hw(cached_irq_mask |= 1UL << (d->irq - 16));
50} 50}
51 51
52static void 52static void
53alcor_disable_irq(unsigned int irq) 53alcor_disable_irq(struct irq_data *d)
54{ 54{
55 alcor_update_irq_hw(cached_irq_mask &= ~(1UL << (irq - 16))); 55 alcor_update_irq_hw(cached_irq_mask &= ~(1UL << (d->irq - 16)));
56} 56}
57 57
58static void 58static void
59alcor_mask_and_ack_irq(unsigned int irq) 59alcor_mask_and_ack_irq(struct irq_data *d)
60{ 60{
61 alcor_disable_irq(irq); 61 alcor_disable_irq(d);
62 62
63 /* On ALCOR/XLT, need to dismiss interrupt via GRU. */ 63 /* On ALCOR/XLT, need to dismiss interrupt via GRU. */
64 *(vuip)GRU_INT_CLEAR = 1 << (irq - 16); mb(); 64 *(vuip)GRU_INT_CLEAR = 1 << (d->irq - 16); mb();
65 *(vuip)GRU_INT_CLEAR = 0; mb(); 65 *(vuip)GRU_INT_CLEAR = 0; mb();
66} 66}
67 67
68static void 68static void
69alcor_isa_mask_and_ack_irq(unsigned int irq) 69alcor_isa_mask_and_ack_irq(struct irq_data *d)
70{ 70{
71 i8259a_mask_and_ack_irq(irq); 71 i8259a_mask_and_ack_irq(d);
72 72
73 /* On ALCOR/XLT, need to dismiss interrupt via GRU. */ 73 /* On ALCOR/XLT, need to dismiss interrupt via GRU. */
74 *(vuip)GRU_INT_CLEAR = 0x80000000; mb(); 74 *(vuip)GRU_INT_CLEAR = 0x80000000; mb();
@@ -77,9 +77,9 @@ alcor_isa_mask_and_ack_irq(unsigned int irq)
77 77
78static struct irq_chip alcor_irq_type = { 78static struct irq_chip alcor_irq_type = {
79 .name = "ALCOR", 79 .name = "ALCOR",
80 .unmask = alcor_enable_irq, 80 .irq_unmask = alcor_enable_irq,
81 .mask = alcor_disable_irq, 81 .irq_mask = alcor_disable_irq,
82 .mask_ack = alcor_mask_and_ack_irq, 82 .irq_mask_ack = alcor_mask_and_ack_irq,
83}; 83};
84 84
85static void 85static void
@@ -126,9 +126,9 @@ alcor_init_irq(void)
126 if (i >= 16+20 && i <= 16+30) 126 if (i >= 16+20 && i <= 16+30)
127 continue; 127 continue;
128 set_irq_chip_and_handler(i, &alcor_irq_type, handle_level_irq); 128 set_irq_chip_and_handler(i, &alcor_irq_type, handle_level_irq);
129 irq_to_desc(i)->status |= IRQ_LEVEL; 129 irq_set_status_flags(i, IRQ_LEVEL);
130 } 130 }
131 i8259a_irq_type.ack = alcor_isa_mask_and_ack_irq; 131 i8259a_irq_type.irq_ack = alcor_isa_mask_and_ack_irq;
132 132
133 init_i8259a_irqs(); 133 init_i8259a_irqs();
134 common_init_isa_dma(); 134 common_init_isa_dma();
diff --git a/arch/alpha/kernel/sys_cabriolet.c b/arch/alpha/kernel/sys_cabriolet.c
index b0c916493aea..57eb6307bc27 100644
--- a/arch/alpha/kernel/sys_cabriolet.c
+++ b/arch/alpha/kernel/sys_cabriolet.c
@@ -46,22 +46,22 @@ cabriolet_update_irq_hw(unsigned int irq, unsigned long mask)
46} 46}
47 47
48static inline void 48static inline void
49cabriolet_enable_irq(unsigned int irq) 49cabriolet_enable_irq(struct irq_data *d)
50{ 50{
51 cabriolet_update_irq_hw(irq, cached_irq_mask &= ~(1UL << irq)); 51 cabriolet_update_irq_hw(d->irq, cached_irq_mask &= ~(1UL << d->irq));
52} 52}
53 53
54static void 54static void
55cabriolet_disable_irq(unsigned int irq) 55cabriolet_disable_irq(struct irq_data *d)
56{ 56{
57 cabriolet_update_irq_hw(irq, cached_irq_mask |= 1UL << irq); 57 cabriolet_update_irq_hw(d->irq, cached_irq_mask |= 1UL << d->irq);
58} 58}
59 59
60static struct irq_chip cabriolet_irq_type = { 60static struct irq_chip cabriolet_irq_type = {
61 .name = "CABRIOLET", 61 .name = "CABRIOLET",
62 .unmask = cabriolet_enable_irq, 62 .irq_unmask = cabriolet_enable_irq,
63 .mask = cabriolet_disable_irq, 63 .irq_mask = cabriolet_disable_irq,
64 .mask_ack = cabriolet_disable_irq, 64 .irq_mask_ack = cabriolet_disable_irq,
65}; 65};
66 66
67static void 67static void
@@ -107,7 +107,7 @@ common_init_irq(void (*srm_dev_int)(unsigned long v))
107 for (i = 16; i < 35; ++i) { 107 for (i = 16; i < 35; ++i) {
108 set_irq_chip_and_handler(i, &cabriolet_irq_type, 108 set_irq_chip_and_handler(i, &cabriolet_irq_type,
109 handle_level_irq); 109 handle_level_irq);
110 irq_to_desc(i)->status |= IRQ_LEVEL; 110 irq_set_status_flags(i, IRQ_LEVEL);
111 } 111 }
112 } 112 }
113 113
diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
index edad5f759ccd..481df4ecb651 100644
--- a/arch/alpha/kernel/sys_dp264.c
+++ b/arch/alpha/kernel/sys_dp264.c
@@ -98,37 +98,37 @@ tsunami_update_irq_hw(unsigned long mask)
98} 98}
99 99
100static void 100static void
101dp264_enable_irq(unsigned int irq) 101dp264_enable_irq(struct irq_data *d)
102{ 102{
103 spin_lock(&dp264_irq_lock); 103 spin_lock(&dp264_irq_lock);
104 cached_irq_mask |= 1UL << irq; 104 cached_irq_mask |= 1UL << d->irq;
105 tsunami_update_irq_hw(cached_irq_mask); 105 tsunami_update_irq_hw(cached_irq_mask);
106 spin_unlock(&dp264_irq_lock); 106 spin_unlock(&dp264_irq_lock);
107} 107}
108 108
109static void 109static void
110dp264_disable_irq(unsigned int irq) 110dp264_disable_irq(struct irq_data *d)
111{ 111{
112 spin_lock(&dp264_irq_lock); 112 spin_lock(&dp264_irq_lock);
113 cached_irq_mask &= ~(1UL << irq); 113 cached_irq_mask &= ~(1UL << d->irq);
114 tsunami_update_irq_hw(cached_irq_mask); 114 tsunami_update_irq_hw(cached_irq_mask);
115 spin_unlock(&dp264_irq_lock); 115 spin_unlock(&dp264_irq_lock);
116} 116}
117 117
118static void 118static void
119clipper_enable_irq(unsigned int irq) 119clipper_enable_irq(struct irq_data *d)
120{ 120{
121 spin_lock(&dp264_irq_lock); 121 spin_lock(&dp264_irq_lock);
122 cached_irq_mask |= 1UL << (irq - 16); 122 cached_irq_mask |= 1UL << (d->irq - 16);
123 tsunami_update_irq_hw(cached_irq_mask); 123 tsunami_update_irq_hw(cached_irq_mask);
124 spin_unlock(&dp264_irq_lock); 124 spin_unlock(&dp264_irq_lock);
125} 125}
126 126
127static void 127static void
128clipper_disable_irq(unsigned int irq) 128clipper_disable_irq(struct irq_data *d)
129{ 129{
130 spin_lock(&dp264_irq_lock); 130 spin_lock(&dp264_irq_lock);
131 cached_irq_mask &= ~(1UL << (irq - 16)); 131 cached_irq_mask &= ~(1UL << (d->irq - 16));
132 tsunami_update_irq_hw(cached_irq_mask); 132 tsunami_update_irq_hw(cached_irq_mask);
133 spin_unlock(&dp264_irq_lock); 133 spin_unlock(&dp264_irq_lock);
134} 134}
@@ -149,10 +149,11 @@ cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
149} 149}
150 150
151static int 151static int
152dp264_set_affinity(unsigned int irq, const struct cpumask *affinity) 152dp264_set_affinity(struct irq_data *d, const struct cpumask *affinity,
153{ 153 bool force)
154{
154 spin_lock(&dp264_irq_lock); 155 spin_lock(&dp264_irq_lock);
155 cpu_set_irq_affinity(irq, *affinity); 156 cpu_set_irq_affinity(d->irq, *affinity);
156 tsunami_update_irq_hw(cached_irq_mask); 157 tsunami_update_irq_hw(cached_irq_mask);
157 spin_unlock(&dp264_irq_lock); 158 spin_unlock(&dp264_irq_lock);
158 159
@@ -160,10 +161,11 @@ dp264_set_affinity(unsigned int irq, const struct cpumask *affinity)
160} 161}
161 162
162static int 163static int
163clipper_set_affinity(unsigned int irq, const struct cpumask *affinity) 164clipper_set_affinity(struct irq_data *d, const struct cpumask *affinity,
164{ 165 bool force)
166{
165 spin_lock(&dp264_irq_lock); 167 spin_lock(&dp264_irq_lock);
166 cpu_set_irq_affinity(irq - 16, *affinity); 168 cpu_set_irq_affinity(d->irq - 16, *affinity);
167 tsunami_update_irq_hw(cached_irq_mask); 169 tsunami_update_irq_hw(cached_irq_mask);
168 spin_unlock(&dp264_irq_lock); 170 spin_unlock(&dp264_irq_lock);
169 171
@@ -171,19 +173,19 @@ clipper_set_affinity(unsigned int irq, const struct cpumask *affinity)
171} 173}
172 174
173static struct irq_chip dp264_irq_type = { 175static struct irq_chip dp264_irq_type = {
174 .name = "DP264", 176 .name = "DP264",
175 .unmask = dp264_enable_irq, 177 .irq_unmask = dp264_enable_irq,
176 .mask = dp264_disable_irq, 178 .irq_mask = dp264_disable_irq,
177 .mask_ack = dp264_disable_irq, 179 .irq_mask_ack = dp264_disable_irq,
178 .set_affinity = dp264_set_affinity, 180 .irq_set_affinity = dp264_set_affinity,
179}; 181};
180 182
181static struct irq_chip clipper_irq_type = { 183static struct irq_chip clipper_irq_type = {
182 .name = "CLIPPER", 184 .name = "CLIPPER",
183 .unmask = clipper_enable_irq, 185 .irq_unmask = clipper_enable_irq,
184 .mask = clipper_disable_irq, 186 .irq_mask = clipper_disable_irq,
185 .mask_ack = clipper_disable_irq, 187 .irq_mask_ack = clipper_disable_irq,
186 .set_affinity = clipper_set_affinity, 188 .irq_set_affinity = clipper_set_affinity,
187}; 189};
188 190
189static void 191static void
@@ -268,8 +270,8 @@ init_tsunami_irqs(struct irq_chip * ops, int imin, int imax)
268{ 270{
269 long i; 271 long i;
270 for (i = imin; i <= imax; ++i) { 272 for (i = imin; i <= imax; ++i) {
271 irq_to_desc(i)->status |= IRQ_LEVEL;
272 set_irq_chip_and_handler(i, ops, handle_level_irq); 273 set_irq_chip_and_handler(i, ops, handle_level_irq);
274 irq_set_status_flags(i, IRQ_LEVEL);
273 } 275 }
274} 276}
275 277
diff --git a/arch/alpha/kernel/sys_eb64p.c b/arch/alpha/kernel/sys_eb64p.c
index ae5f29d127b0..402e908ffb3e 100644
--- a/arch/alpha/kernel/sys_eb64p.c
+++ b/arch/alpha/kernel/sys_eb64p.c
@@ -44,22 +44,22 @@ eb64p_update_irq_hw(unsigned int irq, unsigned long mask)
44} 44}
45 45
46static inline void 46static inline void
47eb64p_enable_irq(unsigned int irq) 47eb64p_enable_irq(struct irq_data *d)
48{ 48{
49 eb64p_update_irq_hw(irq, cached_irq_mask &= ~(1 << irq)); 49 eb64p_update_irq_hw(d->irq, cached_irq_mask &= ~(1 << d->irq));
50} 50}
51 51
52static void 52static void
53eb64p_disable_irq(unsigned int irq) 53eb64p_disable_irq(struct irq_data *d)
54{ 54{
55 eb64p_update_irq_hw(irq, cached_irq_mask |= 1 << irq); 55 eb64p_update_irq_hw(d->irq, cached_irq_mask |= 1 << d->irq);
56} 56}
57 57
58static struct irq_chip eb64p_irq_type = { 58static struct irq_chip eb64p_irq_type = {
59 .name = "EB64P", 59 .name = "EB64P",
60 .unmask = eb64p_enable_irq, 60 .irq_unmask = eb64p_enable_irq,
61 .mask = eb64p_disable_irq, 61 .irq_mask = eb64p_disable_irq,
62 .mask_ack = eb64p_disable_irq, 62 .irq_mask_ack = eb64p_disable_irq,
63}; 63};
64 64
65static void 65static void
@@ -118,9 +118,9 @@ eb64p_init_irq(void)
118 init_i8259a_irqs(); 118 init_i8259a_irqs();
119 119
120 for (i = 16; i < 32; ++i) { 120 for (i = 16; i < 32; ++i) {
121 irq_to_desc(i)->status |= IRQ_LEVEL;
122 set_irq_chip_and_handler(i, &eb64p_irq_type, handle_level_irq); 121 set_irq_chip_and_handler(i, &eb64p_irq_type, handle_level_irq);
123 } 122 irq_set_status_flags(i, IRQ_LEVEL);
123 }
124 124
125 common_init_isa_dma(); 125 common_init_isa_dma();
126 setup_irq(16+5, &isa_cascade_irqaction); 126 setup_irq(16+5, &isa_cascade_irqaction);
diff --git a/arch/alpha/kernel/sys_eiger.c b/arch/alpha/kernel/sys_eiger.c
index 1121bc5c6c6c..0b44a54c1522 100644
--- a/arch/alpha/kernel/sys_eiger.c
+++ b/arch/alpha/kernel/sys_eiger.c
@@ -51,16 +51,18 @@ eiger_update_irq_hw(unsigned long irq, unsigned long mask)
51} 51}
52 52
53static inline void 53static inline void
54eiger_enable_irq(unsigned int irq) 54eiger_enable_irq(struct irq_data *d)
55{ 55{
56 unsigned int irq = d->irq;
56 unsigned long mask; 57 unsigned long mask;
57 mask = (cached_irq_mask[irq >= 64] &= ~(1UL << (irq & 63))); 58 mask = (cached_irq_mask[irq >= 64] &= ~(1UL << (irq & 63)));
58 eiger_update_irq_hw(irq, mask); 59 eiger_update_irq_hw(irq, mask);
59} 60}
60 61
61static void 62static void
62eiger_disable_irq(unsigned int irq) 63eiger_disable_irq(struct irq_data *d)
63{ 64{
65 unsigned int irq = d->irq;
64 unsigned long mask; 66 unsigned long mask;
65 mask = (cached_irq_mask[irq >= 64] |= 1UL << (irq & 63)); 67 mask = (cached_irq_mask[irq >= 64] |= 1UL << (irq & 63));
66 eiger_update_irq_hw(irq, mask); 68 eiger_update_irq_hw(irq, mask);
@@ -68,9 +70,9 @@ eiger_disable_irq(unsigned int irq)
68 70
69static struct irq_chip eiger_irq_type = { 71static struct irq_chip eiger_irq_type = {
70 .name = "EIGER", 72 .name = "EIGER",
71 .unmask = eiger_enable_irq, 73 .irq_unmask = eiger_enable_irq,
72 .mask = eiger_disable_irq, 74 .irq_mask = eiger_disable_irq,
73 .mask_ack = eiger_disable_irq, 75 .irq_mask_ack = eiger_disable_irq,
74}; 76};
75 77
76static void 78static void
@@ -136,8 +138,8 @@ eiger_init_irq(void)
136 init_i8259a_irqs(); 138 init_i8259a_irqs();
137 139
138 for (i = 16; i < 128; ++i) { 140 for (i = 16; i < 128; ++i) {
139 irq_to_desc(i)->status |= IRQ_LEVEL;
140 set_irq_chip_and_handler(i, &eiger_irq_type, handle_level_irq); 141 set_irq_chip_and_handler(i, &eiger_irq_type, handle_level_irq);
142 irq_set_status_flags(i, IRQ_LEVEL);
141 } 143 }
142} 144}
143 145
diff --git a/arch/alpha/kernel/sys_jensen.c b/arch/alpha/kernel/sys_jensen.c
index 34f55e03d331..00341b75c8b2 100644
--- a/arch/alpha/kernel/sys_jensen.c
+++ b/arch/alpha/kernel/sys_jensen.c
@@ -63,34 +63,34 @@
63 */ 63 */
64 64
65static void 65static void
66jensen_local_enable(unsigned int irq) 66jensen_local_enable(struct irq_data *d)
67{ 67{
68 /* the parport is really hw IRQ 1, silly Jensen. */ 68 /* the parport is really hw IRQ 1, silly Jensen. */
69 if (irq == 7) 69 if (d->irq == 7)
70 i8259a_enable_irq(1); 70 i8259a_enable_irq(d);
71} 71}
72 72
73static void 73static void
74jensen_local_disable(unsigned int irq) 74jensen_local_disable(struct irq_data *d)
75{ 75{
76 /* the parport is really hw IRQ 1, silly Jensen. */ 76 /* the parport is really hw IRQ 1, silly Jensen. */
77 if (irq == 7) 77 if (d->irq == 7)
78 i8259a_disable_irq(1); 78 i8259a_disable_irq(d);
79} 79}
80 80
81static void 81static void
82jensen_local_mask_ack(unsigned int irq) 82jensen_local_mask_ack(struct irq_data *d)
83{ 83{
84 /* the parport is really hw IRQ 1, silly Jensen. */ 84 /* the parport is really hw IRQ 1, silly Jensen. */
85 if (irq == 7) 85 if (d->irq == 7)
86 i8259a_mask_and_ack_irq(1); 86 i8259a_mask_and_ack_irq(d);
87} 87}
88 88
89static struct irq_chip jensen_local_irq_type = { 89static struct irq_chip jensen_local_irq_type = {
90 .name = "LOCAL", 90 .name = "LOCAL",
91 .unmask = jensen_local_enable, 91 .irq_unmask = jensen_local_enable,
92 .mask = jensen_local_disable, 92 .irq_mask = jensen_local_disable,
93 .mask_ack = jensen_local_mask_ack, 93 .irq_mask_ack = jensen_local_mask_ack,
94}; 94};
95 95
96static void 96static void
diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c
index 2bfc9f1b1ddc..e61910734e41 100644
--- a/arch/alpha/kernel/sys_marvel.c
+++ b/arch/alpha/kernel/sys_marvel.c
@@ -104,9 +104,10 @@ io7_get_irq_ctl(unsigned int irq, struct io7 **pio7)
104} 104}
105 105
106static void 106static void
107io7_enable_irq(unsigned int irq) 107io7_enable_irq(struct irq_data *d)
108{ 108{
109 volatile unsigned long *ctl; 109 volatile unsigned long *ctl;
110 unsigned int irq = d->irq;
110 struct io7 *io7; 111 struct io7 *io7;
111 112
112 ctl = io7_get_irq_ctl(irq, &io7); 113 ctl = io7_get_irq_ctl(irq, &io7);
@@ -115,7 +116,7 @@ io7_enable_irq(unsigned int irq)
115 __func__, irq); 116 __func__, irq);
116 return; 117 return;
117 } 118 }
118 119
119 spin_lock(&io7->irq_lock); 120 spin_lock(&io7->irq_lock);
120 *ctl |= 1UL << 24; 121 *ctl |= 1UL << 24;
121 mb(); 122 mb();
@@ -124,9 +125,10 @@ io7_enable_irq(unsigned int irq)
124} 125}
125 126
126static void 127static void
127io7_disable_irq(unsigned int irq) 128io7_disable_irq(struct irq_data *d)
128{ 129{
129 volatile unsigned long *ctl; 130 volatile unsigned long *ctl;
131 unsigned int irq = d->irq;
130 struct io7 *io7; 132 struct io7 *io7;
131 133
132 ctl = io7_get_irq_ctl(irq, &io7); 134 ctl = io7_get_irq_ctl(irq, &io7);
@@ -135,7 +137,7 @@ io7_disable_irq(unsigned int irq)
135 __func__, irq); 137 __func__, irq);
136 return; 138 return;
137 } 139 }
138 140
139 spin_lock(&io7->irq_lock); 141 spin_lock(&io7->irq_lock);
140 *ctl &= ~(1UL << 24); 142 *ctl &= ~(1UL << 24);
141 mb(); 143 mb();
@@ -144,35 +146,29 @@ io7_disable_irq(unsigned int irq)
144} 146}
145 147
146static void 148static void
147marvel_irq_noop(unsigned int irq) 149marvel_irq_noop(struct irq_data *d)
148{ 150{
149 return; 151 return;
150}
151
152static unsigned int
153marvel_irq_noop_return(unsigned int irq)
154{
155 return 0;
156} 152}
157 153
158static struct irq_chip marvel_legacy_irq_type = { 154static struct irq_chip marvel_legacy_irq_type = {
159 .name = "LEGACY", 155 .name = "LEGACY",
160 .mask = marvel_irq_noop, 156 .irq_mask = marvel_irq_noop,
161 .unmask = marvel_irq_noop, 157 .irq_unmask = marvel_irq_noop,
162}; 158};
163 159
164static struct irq_chip io7_lsi_irq_type = { 160static struct irq_chip io7_lsi_irq_type = {
165 .name = "LSI", 161 .name = "LSI",
166 .unmask = io7_enable_irq, 162 .irq_unmask = io7_enable_irq,
167 .mask = io7_disable_irq, 163 .irq_mask = io7_disable_irq,
168 .mask_ack = io7_disable_irq, 164 .irq_mask_ack = io7_disable_irq,
169}; 165};
170 166
171static struct irq_chip io7_msi_irq_type = { 167static struct irq_chip io7_msi_irq_type = {
172 .name = "MSI", 168 .name = "MSI",
173 .unmask = io7_enable_irq, 169 .irq_unmask = io7_enable_irq,
174 .mask = io7_disable_irq, 170 .irq_mask = io7_disable_irq,
175 .ack = marvel_irq_noop, 171 .irq_ack = marvel_irq_noop,
176}; 172};
177 173
178static void 174static void
@@ -280,8 +276,8 @@ init_io7_irqs(struct io7 *io7,
280 276
281 /* Set up the lsi irqs. */ 277 /* Set up the lsi irqs. */
282 for (i = 0; i < 128; ++i) { 278 for (i = 0; i < 128; ++i) {
283 irq_to_desc(base + i)->status |= IRQ_LEVEL;
284 set_irq_chip_and_handler(base + i, lsi_ops, handle_level_irq); 279 set_irq_chip_and_handler(base + i, lsi_ops, handle_level_irq);
280 irq_set_status_flags(i, IRQ_LEVEL);
285 } 281 }
286 282
287 /* Disable the implemented irqs in hardware. */ 283 /* Disable the implemented irqs in hardware. */
@@ -294,8 +290,8 @@ init_io7_irqs(struct io7 *io7,
294 290
295 /* Set up the msi irqs. */ 291 /* Set up the msi irqs. */
296 for (i = 128; i < (128 + 512); ++i) { 292 for (i = 128; i < (128 + 512); ++i) {
297 irq_to_desc(base + i)->status |= IRQ_LEVEL;
298 set_irq_chip_and_handler(base + i, msi_ops, handle_level_irq); 293 set_irq_chip_and_handler(base + i, msi_ops, handle_level_irq);
294 irq_set_status_flags(i, IRQ_LEVEL);
299 } 295 }
300 296
301 for (i = 0; i < 16; ++i) 297 for (i = 0; i < 16; ++i)
diff --git a/arch/alpha/kernel/sys_mikasa.c b/arch/alpha/kernel/sys_mikasa.c
index bcc1639e8efb..cf7f43dd3147 100644
--- a/arch/alpha/kernel/sys_mikasa.c
+++ b/arch/alpha/kernel/sys_mikasa.c
@@ -43,22 +43,22 @@ mikasa_update_irq_hw(int mask)
43} 43}
44 44
45static inline void 45static inline void
46mikasa_enable_irq(unsigned int irq) 46mikasa_enable_irq(struct irq_data *d)
47{ 47{
48 mikasa_update_irq_hw(cached_irq_mask |= 1 << (irq - 16)); 48 mikasa_update_irq_hw(cached_irq_mask |= 1 << (d->irq - 16));
49} 49}
50 50
51static void 51static void
52mikasa_disable_irq(unsigned int irq) 52mikasa_disable_irq(struct irq_data *d)
53{ 53{
54 mikasa_update_irq_hw(cached_irq_mask &= ~(1 << (irq - 16))); 54 mikasa_update_irq_hw(cached_irq_mask &= ~(1 << (d->irq - 16)));
55} 55}
56 56
57static struct irq_chip mikasa_irq_type = { 57static struct irq_chip mikasa_irq_type = {
58 .name = "MIKASA", 58 .name = "MIKASA",
59 .unmask = mikasa_enable_irq, 59 .irq_unmask = mikasa_enable_irq,
60 .mask = mikasa_disable_irq, 60 .irq_mask = mikasa_disable_irq,
61 .mask_ack = mikasa_disable_irq, 61 .irq_mask_ack = mikasa_disable_irq,
62}; 62};
63 63
64static void 64static void
@@ -98,8 +98,8 @@ mikasa_init_irq(void)
98 mikasa_update_irq_hw(0); 98 mikasa_update_irq_hw(0);
99 99
100 for (i = 16; i < 32; ++i) { 100 for (i = 16; i < 32; ++i) {
101 irq_to_desc(i)->status |= IRQ_LEVEL;
102 set_irq_chip_and_handler(i, &mikasa_irq_type, handle_level_irq); 101 set_irq_chip_and_handler(i, &mikasa_irq_type, handle_level_irq);
102 irq_set_status_flags(i, IRQ_LEVEL);
103 } 103 }
104 104
105 init_i8259a_irqs(); 105 init_i8259a_irqs();
diff --git a/arch/alpha/kernel/sys_noritake.c b/arch/alpha/kernel/sys_noritake.c
index e88f4ae1260e..92bc188e94a9 100644
--- a/arch/alpha/kernel/sys_noritake.c
+++ b/arch/alpha/kernel/sys_noritake.c
@@ -48,22 +48,22 @@ noritake_update_irq_hw(int irq, int mask)
48} 48}
49 49
50static void 50static void
51noritake_enable_irq(unsigned int irq) 51noritake_enable_irq(struct irq_data *d)
52{ 52{
53 noritake_update_irq_hw(irq, cached_irq_mask |= 1 << (irq - 16)); 53 noritake_update_irq_hw(d->irq, cached_irq_mask |= 1 << (d->irq - 16));
54} 54}
55 55
56static void 56static void
57noritake_disable_irq(unsigned int irq) 57noritake_disable_irq(struct irq_data *d)
58{ 58{
59 noritake_update_irq_hw(irq, cached_irq_mask &= ~(1 << (irq - 16))); 59 noritake_update_irq_hw(d->irq, cached_irq_mask &= ~(1 << (d->irq - 16)));
60} 60}
61 61
62static struct irq_chip noritake_irq_type = { 62static struct irq_chip noritake_irq_type = {
63 .name = "NORITAKE", 63 .name = "NORITAKE",
64 .unmask = noritake_enable_irq, 64 .irq_unmask = noritake_enable_irq,
65 .mask = noritake_disable_irq, 65 .irq_mask = noritake_disable_irq,
66 .mask_ack = noritake_disable_irq, 66 .irq_mask_ack = noritake_disable_irq,
67}; 67};
68 68
69static void 69static void
@@ -127,8 +127,8 @@ noritake_init_irq(void)
127 outw(0, 0x54c); 127 outw(0, 0x54c);
128 128
129 for (i = 16; i < 48; ++i) { 129 for (i = 16; i < 48; ++i) {
130 irq_to_desc(i)->status |= IRQ_LEVEL;
131 set_irq_chip_and_handler(i, &noritake_irq_type, handle_level_irq); 130 set_irq_chip_and_handler(i, &noritake_irq_type, handle_level_irq);
131 irq_set_status_flags(i, IRQ_LEVEL);
132 } 132 }
133 133
134 init_i8259a_irqs(); 134 init_i8259a_irqs();
diff --git a/arch/alpha/kernel/sys_rawhide.c b/arch/alpha/kernel/sys_rawhide.c
index 6a51364dd1cc..936d4140ed5f 100644
--- a/arch/alpha/kernel/sys_rawhide.c
+++ b/arch/alpha/kernel/sys_rawhide.c
@@ -56,9 +56,10 @@ rawhide_update_irq_hw(int hose, int mask)
56 (((h) < MCPCIA_MAX_HOSES) && (cached_irq_masks[(h)] != 0)) 56 (((h) < MCPCIA_MAX_HOSES) && (cached_irq_masks[(h)] != 0))
57 57
58static inline void 58static inline void
59rawhide_enable_irq(unsigned int irq) 59rawhide_enable_irq(struct irq_data *d)
60{ 60{
61 unsigned int mask, hose; 61 unsigned int mask, hose;
62 unsigned int irq = d->irq;
62 63
63 irq -= 16; 64 irq -= 16;
64 hose = irq / 24; 65 hose = irq / 24;
@@ -76,9 +77,10 @@ rawhide_enable_irq(unsigned int irq)
76} 77}
77 78
78static void 79static void
79rawhide_disable_irq(unsigned int irq) 80rawhide_disable_irq(struct irq_data *d)
80{ 81{
81 unsigned int mask, hose; 82 unsigned int mask, hose;
83 unsigned int irq = d->irq;
82 84
83 irq -= 16; 85 irq -= 16;
84 hose = irq / 24; 86 hose = irq / 24;
@@ -96,9 +98,10 @@ rawhide_disable_irq(unsigned int irq)
96} 98}
97 99
98static void 100static void
99rawhide_mask_and_ack_irq(unsigned int irq) 101rawhide_mask_and_ack_irq(struct irq_data *d)
100{ 102{
101 unsigned int mask, mask1, hose; 103 unsigned int mask, mask1, hose;
104 unsigned int irq = d->irq;
102 105
103 irq -= 16; 106 irq -= 16;
104 hose = irq / 24; 107 hose = irq / 24;
@@ -123,9 +126,9 @@ rawhide_mask_and_ack_irq(unsigned int irq)
123 126
124static struct irq_chip rawhide_irq_type = { 127static struct irq_chip rawhide_irq_type = {
125 .name = "RAWHIDE", 128 .name = "RAWHIDE",
126 .unmask = rawhide_enable_irq, 129 .irq_unmask = rawhide_enable_irq,
127 .mask = rawhide_disable_irq, 130 .irq_mask = rawhide_disable_irq,
128 .mask_ack = rawhide_mask_and_ack_irq, 131 .irq_mask_ack = rawhide_mask_and_ack_irq,
129}; 132};
130 133
131static void 134static void
@@ -177,8 +180,8 @@ rawhide_init_irq(void)
177 } 180 }
178 181
179 for (i = 16; i < 128; ++i) { 182 for (i = 16; i < 128; ++i) {
180 irq_to_desc(i)->status |= IRQ_LEVEL;
181 set_irq_chip_and_handler(i, &rawhide_irq_type, handle_level_irq); 183 set_irq_chip_and_handler(i, &rawhide_irq_type, handle_level_irq);
184 irq_set_status_flags(i, IRQ_LEVEL);
182 } 185 }
183 186
184 init_i8259a_irqs(); 187 init_i8259a_irqs();
diff --git a/arch/alpha/kernel/sys_rx164.c b/arch/alpha/kernel/sys_rx164.c
index 89e7e37ec84c..cea22a62913b 100644
--- a/arch/alpha/kernel/sys_rx164.c
+++ b/arch/alpha/kernel/sys_rx164.c
@@ -47,22 +47,22 @@ rx164_update_irq_hw(unsigned long mask)
47} 47}
48 48
49static inline void 49static inline void
50rx164_enable_irq(unsigned int irq) 50rx164_enable_irq(struct irq_data *d)
51{ 51{
52 rx164_update_irq_hw(cached_irq_mask |= 1UL << (irq - 16)); 52 rx164_update_irq_hw(cached_irq_mask |= 1UL << (d->irq - 16));
53} 53}
54 54
55static void 55static void
56rx164_disable_irq(unsigned int irq) 56rx164_disable_irq(struct irq_data *d)
57{ 57{
58 rx164_update_irq_hw(cached_irq_mask &= ~(1UL << (irq - 16))); 58 rx164_update_irq_hw(cached_irq_mask &= ~(1UL << (d->irq - 16)));
59} 59}
60 60
61static struct irq_chip rx164_irq_type = { 61static struct irq_chip rx164_irq_type = {
62 .name = "RX164", 62 .name = "RX164",
63 .unmask = rx164_enable_irq, 63 .irq_unmask = rx164_enable_irq,
64 .mask = rx164_disable_irq, 64 .irq_mask = rx164_disable_irq,
65 .mask_ack = rx164_disable_irq, 65 .irq_mask_ack = rx164_disable_irq,
66}; 66};
67 67
68static void 68static void
@@ -99,8 +99,8 @@ rx164_init_irq(void)
99 99
100 rx164_update_irq_hw(0); 100 rx164_update_irq_hw(0);
101 for (i = 16; i < 40; ++i) { 101 for (i = 16; i < 40; ++i) {
102 irq_to_desc(i)->status |= IRQ_LEVEL;
103 set_irq_chip_and_handler(i, &rx164_irq_type, handle_level_irq); 102 set_irq_chip_and_handler(i, &rx164_irq_type, handle_level_irq);
103 irq_set_status_flags(i, IRQ_LEVEL);
104 } 104 }
105 105
106 init_i8259a_irqs(); 106 init_i8259a_irqs();
diff --git a/arch/alpha/kernel/sys_sable.c b/arch/alpha/kernel/sys_sable.c
index 5c4423d1b06c..a349538aabc9 100644
--- a/arch/alpha/kernel/sys_sable.c
+++ b/arch/alpha/kernel/sys_sable.c
@@ -443,11 +443,11 @@ lynx_swizzle(struct pci_dev *dev, u8 *pinp)
443/* GENERIC irq routines */ 443/* GENERIC irq routines */
444 444
445static inline void 445static inline void
446sable_lynx_enable_irq(unsigned int irq) 446sable_lynx_enable_irq(struct irq_data *d)
447{ 447{
448 unsigned long bit, mask; 448 unsigned long bit, mask;
449 449
450 bit = sable_lynx_irq_swizzle->irq_to_mask[irq]; 450 bit = sable_lynx_irq_swizzle->irq_to_mask[d->irq];
451 spin_lock(&sable_lynx_irq_lock); 451 spin_lock(&sable_lynx_irq_lock);
452 mask = sable_lynx_irq_swizzle->shadow_mask &= ~(1UL << bit); 452 mask = sable_lynx_irq_swizzle->shadow_mask &= ~(1UL << bit);
453 sable_lynx_irq_swizzle->update_irq_hw(bit, mask); 453 sable_lynx_irq_swizzle->update_irq_hw(bit, mask);
@@ -459,11 +459,11 @@ sable_lynx_enable_irq(unsigned int irq)
459} 459}
460 460
461static void 461static void
462sable_lynx_disable_irq(unsigned int irq) 462sable_lynx_disable_irq(struct irq_data *d)
463{ 463{
464 unsigned long bit, mask; 464 unsigned long bit, mask;
465 465
466 bit = sable_lynx_irq_swizzle->irq_to_mask[irq]; 466 bit = sable_lynx_irq_swizzle->irq_to_mask[d->irq];
467 spin_lock(&sable_lynx_irq_lock); 467 spin_lock(&sable_lynx_irq_lock);
468 mask = sable_lynx_irq_swizzle->shadow_mask |= 1UL << bit; 468 mask = sable_lynx_irq_swizzle->shadow_mask |= 1UL << bit;
469 sable_lynx_irq_swizzle->update_irq_hw(bit, mask); 469 sable_lynx_irq_swizzle->update_irq_hw(bit, mask);
@@ -475,11 +475,11 @@ sable_lynx_disable_irq(unsigned int irq)
475} 475}
476 476
477static void 477static void
478sable_lynx_mask_and_ack_irq(unsigned int irq) 478sable_lynx_mask_and_ack_irq(struct irq_data *d)
479{ 479{
480 unsigned long bit, mask; 480 unsigned long bit, mask;
481 481
482 bit = sable_lynx_irq_swizzle->irq_to_mask[irq]; 482 bit = sable_lynx_irq_swizzle->irq_to_mask[d->irq];
483 spin_lock(&sable_lynx_irq_lock); 483 spin_lock(&sable_lynx_irq_lock);
484 mask = sable_lynx_irq_swizzle->shadow_mask |= 1UL << bit; 484 mask = sable_lynx_irq_swizzle->shadow_mask |= 1UL << bit;
485 sable_lynx_irq_swizzle->update_irq_hw(bit, mask); 485 sable_lynx_irq_swizzle->update_irq_hw(bit, mask);
@@ -489,9 +489,9 @@ sable_lynx_mask_and_ack_irq(unsigned int irq)
489 489
490static struct irq_chip sable_lynx_irq_type = { 490static struct irq_chip sable_lynx_irq_type = {
491 .name = "SABLE/LYNX", 491 .name = "SABLE/LYNX",
492 .unmask = sable_lynx_enable_irq, 492 .irq_unmask = sable_lynx_enable_irq,
493 .mask = sable_lynx_disable_irq, 493 .irq_mask = sable_lynx_disable_irq,
494 .mask_ack = sable_lynx_mask_and_ack_irq, 494 .irq_mask_ack = sable_lynx_mask_and_ack_irq,
495}; 495};
496 496
497static void 497static void
@@ -518,9 +518,9 @@ sable_lynx_init_irq(int nr_of_irqs)
518 long i; 518 long i;
519 519
520 for (i = 0; i < nr_of_irqs; ++i) { 520 for (i = 0; i < nr_of_irqs; ++i) {
521 irq_to_desc(i)->status |= IRQ_LEVEL;
522 set_irq_chip_and_handler(i, &sable_lynx_irq_type, 521 set_irq_chip_and_handler(i, &sable_lynx_irq_type,
523 handle_level_irq); 522 handle_level_irq);
523 irq_set_status_flags(i, IRQ_LEVEL);
524 } 524 }
525 525
526 common_init_isa_dma(); 526 common_init_isa_dma();
diff --git a/arch/alpha/kernel/sys_takara.c b/arch/alpha/kernel/sys_takara.c
index f8a1e8a862fb..42a5331f13c4 100644
--- a/arch/alpha/kernel/sys_takara.c
+++ b/arch/alpha/kernel/sys_takara.c
@@ -45,16 +45,18 @@ takara_update_irq_hw(unsigned long irq, unsigned long mask)
45} 45}
46 46
47static inline void 47static inline void
48takara_enable_irq(unsigned int irq) 48takara_enable_irq(struct irq_data *d)
49{ 49{
50 unsigned int irq = d->irq;
50 unsigned long mask; 51 unsigned long mask;
51 mask = (cached_irq_mask[irq >= 64] &= ~(1UL << (irq & 63))); 52 mask = (cached_irq_mask[irq >= 64] &= ~(1UL << (irq & 63)));
52 takara_update_irq_hw(irq, mask); 53 takara_update_irq_hw(irq, mask);
53} 54}
54 55
55static void 56static void
56takara_disable_irq(unsigned int irq) 57takara_disable_irq(struct irq_data *d)
57{ 58{
59 unsigned int irq = d->irq;
58 unsigned long mask; 60 unsigned long mask;
59 mask = (cached_irq_mask[irq >= 64] |= 1UL << (irq & 63)); 61 mask = (cached_irq_mask[irq >= 64] |= 1UL << (irq & 63));
60 takara_update_irq_hw(irq, mask); 62 takara_update_irq_hw(irq, mask);
@@ -62,9 +64,9 @@ takara_disable_irq(unsigned int irq)
62 64
63static struct irq_chip takara_irq_type = { 65static struct irq_chip takara_irq_type = {
64 .name = "TAKARA", 66 .name = "TAKARA",
65 .unmask = takara_enable_irq, 67 .irq_unmask = takara_enable_irq,
66 .mask = takara_disable_irq, 68 .irq_mask = takara_disable_irq,
67 .mask_ack = takara_disable_irq, 69 .irq_mask_ack = takara_disable_irq,
68}; 70};
69 71
70static void 72static void
@@ -136,8 +138,8 @@ takara_init_irq(void)
136 takara_update_irq_hw(i, -1); 138 takara_update_irq_hw(i, -1);
137 139
138 for (i = 16; i < 128; ++i) { 140 for (i = 16; i < 128; ++i) {
139 irq_to_desc(i)->status |= IRQ_LEVEL;
140 set_irq_chip_and_handler(i, &takara_irq_type, handle_level_irq); 141 set_irq_chip_and_handler(i, &takara_irq_type, handle_level_irq);
142 irq_set_status_flags(i, IRQ_LEVEL);
141 } 143 }
142 144
143 common_init_isa_dma(); 145 common_init_isa_dma();
diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c
index e02494bf5ef3..8c13a0c77830 100644
--- a/arch/alpha/kernel/sys_titan.c
+++ b/arch/alpha/kernel/sys_titan.c
@@ -112,8 +112,9 @@ titan_update_irq_hw(unsigned long mask)
112} 112}
113 113
114static inline void 114static inline void
115titan_enable_irq(unsigned int irq) 115titan_enable_irq(struct irq_data *d)
116{ 116{
117 unsigned int irq = d->irq;
117 spin_lock(&titan_irq_lock); 118 spin_lock(&titan_irq_lock);
118 titan_cached_irq_mask |= 1UL << (irq - 16); 119 titan_cached_irq_mask |= 1UL << (irq - 16);
119 titan_update_irq_hw(titan_cached_irq_mask); 120 titan_update_irq_hw(titan_cached_irq_mask);
@@ -121,8 +122,9 @@ titan_enable_irq(unsigned int irq)
121} 122}
122 123
123static inline void 124static inline void
124titan_disable_irq(unsigned int irq) 125titan_disable_irq(struct irq_data *d)
125{ 126{
127 unsigned int irq = d->irq;
126 spin_lock(&titan_irq_lock); 128 spin_lock(&titan_irq_lock);
127 titan_cached_irq_mask &= ~(1UL << (irq - 16)); 129 titan_cached_irq_mask &= ~(1UL << (irq - 16));
128 titan_update_irq_hw(titan_cached_irq_mask); 130 titan_update_irq_hw(titan_cached_irq_mask);
@@ -144,8 +146,10 @@ titan_cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
144} 146}
145 147
146static int 148static int
147titan_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) 149titan_set_irq_affinity(struct irq_data *d, const struct cpumask *affinity,
150 bool force)
148{ 151{
152 unsigned int irq = d->irq;
149 spin_lock(&titan_irq_lock); 153 spin_lock(&titan_irq_lock);
150 titan_cpu_set_irq_affinity(irq - 16, *affinity); 154 titan_cpu_set_irq_affinity(irq - 16, *affinity);
151 titan_update_irq_hw(titan_cached_irq_mask); 155 titan_update_irq_hw(titan_cached_irq_mask);
@@ -175,17 +179,17 @@ init_titan_irqs(struct irq_chip * ops, int imin, int imax)
175{ 179{
176 long i; 180 long i;
177 for (i = imin; i <= imax; ++i) { 181 for (i = imin; i <= imax; ++i) {
178 irq_to_desc(i)->status |= IRQ_LEVEL;
179 set_irq_chip_and_handler(i, ops, handle_level_irq); 182 set_irq_chip_and_handler(i, ops, handle_level_irq);
183 irq_set_status_flags(i, IRQ_LEVEL);
180 } 184 }
181} 185}
182 186
183static struct irq_chip titan_irq_type = { 187static struct irq_chip titan_irq_type = {
184 .name = "TITAN", 188 .name = "TITAN",
185 .unmask = titan_enable_irq, 189 .irq_unmask = titan_enable_irq,
186 .mask = titan_disable_irq, 190 .irq_mask = titan_disable_irq,
187 .mask_ack = titan_disable_irq, 191 .irq_mask_ack = titan_disable_irq,
188 .set_affinity = titan_set_irq_affinity, 192 .irq_set_affinity = titan_set_irq_affinity,
189}; 193};
190 194
191static irqreturn_t 195static irqreturn_t
diff --git a/arch/alpha/kernel/sys_wildfire.c b/arch/alpha/kernel/sys_wildfire.c
index eec52594d410..ca60a387ef0a 100644
--- a/arch/alpha/kernel/sys_wildfire.c
+++ b/arch/alpha/kernel/sys_wildfire.c
@@ -104,10 +104,12 @@ wildfire_init_irq_hw(void)
104} 104}
105 105
106static void 106static void
107wildfire_enable_irq(unsigned int irq) 107wildfire_enable_irq(struct irq_data *d)
108{ 108{
109 unsigned int irq = d->irq;
110
109 if (irq < 16) 111 if (irq < 16)
110 i8259a_enable_irq(irq); 112 i8259a_enable_irq(d);
111 113
112 spin_lock(&wildfire_irq_lock); 114 spin_lock(&wildfire_irq_lock);
113 set_bit(irq, &cached_irq_mask); 115 set_bit(irq, &cached_irq_mask);
@@ -116,10 +118,12 @@ wildfire_enable_irq(unsigned int irq)
116} 118}
117 119
118static void 120static void
119wildfire_disable_irq(unsigned int irq) 121wildfire_disable_irq(struct irq_data *d)
120{ 122{
123 unsigned int irq = d->irq;
124
121 if (irq < 16) 125 if (irq < 16)
122 i8259a_disable_irq(irq); 126 i8259a_disable_irq(d);
123 127
124 spin_lock(&wildfire_irq_lock); 128 spin_lock(&wildfire_irq_lock);
125 clear_bit(irq, &cached_irq_mask); 129 clear_bit(irq, &cached_irq_mask);
@@ -128,10 +132,12 @@ wildfire_disable_irq(unsigned int irq)
128} 132}
129 133
130static void 134static void
131wildfire_mask_and_ack_irq(unsigned int irq) 135wildfire_mask_and_ack_irq(struct irq_data *d)
132{ 136{
137 unsigned int irq = d->irq;
138
133 if (irq < 16) 139 if (irq < 16)
134 i8259a_mask_and_ack_irq(irq); 140 i8259a_mask_and_ack_irq(d);
135 141
136 spin_lock(&wildfire_irq_lock); 142 spin_lock(&wildfire_irq_lock);
137 clear_bit(irq, &cached_irq_mask); 143 clear_bit(irq, &cached_irq_mask);
@@ -141,9 +147,9 @@ wildfire_mask_and_ack_irq(unsigned int irq)
141 147
142static struct irq_chip wildfire_irq_type = { 148static struct irq_chip wildfire_irq_type = {
143 .name = "WILDFIRE", 149 .name = "WILDFIRE",
144 .unmask = wildfire_enable_irq, 150 .irq_unmask = wildfire_enable_irq,
145 .mask = wildfire_disable_irq, 151 .irq_mask = wildfire_disable_irq,
146 .mask_ack = wildfire_mask_and_ack_irq, 152 .irq_mask_ack = wildfire_mask_and_ack_irq,
147}; 153};
148 154
149static void __init 155static void __init
@@ -177,21 +183,21 @@ wildfire_init_irq_per_pca(int qbbno, int pcano)
177 for (i = 0; i < 16; ++i) { 183 for (i = 0; i < 16; ++i) {
178 if (i == 2) 184 if (i == 2)
179 continue; 185 continue;
180 irq_to_desc(i+irq_bias)->status |= IRQ_LEVEL;
181 set_irq_chip_and_handler(i+irq_bias, &wildfire_irq_type, 186 set_irq_chip_and_handler(i+irq_bias, &wildfire_irq_type,
182 handle_level_irq); 187 handle_level_irq);
188 irq_set_status_flags(i + irq_bias, IRQ_LEVEL);
183 } 189 }
184 190
185 irq_to_desc(36+irq_bias)->status |= IRQ_LEVEL;
186 set_irq_chip_and_handler(36+irq_bias, &wildfire_irq_type, 191 set_irq_chip_and_handler(36+irq_bias, &wildfire_irq_type,
187 handle_level_irq); 192 handle_level_irq);
193 irq_set_status_flags(36 + irq_bias, IRQ_LEVEL);
188 for (i = 40; i < 64; ++i) { 194 for (i = 40; i < 64; ++i) {
189 irq_to_desc(i+irq_bias)->status |= IRQ_LEVEL;
190 set_irq_chip_and_handler(i+irq_bias, &wildfire_irq_type, 195 set_irq_chip_and_handler(i+irq_bias, &wildfire_irq_type,
191 handle_level_irq); 196 handle_level_irq);
197 irq_set_status_flags(i + irq_bias, IRQ_LEVEL);
192 } 198 }
193 199
194 setup_irq(32+irq_bias, &isa_enable); 200 setup_irq(32+irq_bias, &isa_enable);
195} 201}
196 202
197static void __init 203static void __init
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index c1f3e7cb82a4..a58e84f1a63b 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -159,7 +159,7 @@ void read_persistent_clock(struct timespec *ts)
159 159
160/* 160/*
161 * timer_interrupt() needs to keep up the real-time clock, 161 * timer_interrupt() needs to keep up the real-time clock,
162 * as well as call the "do_timer()" routine every clocktick 162 * as well as call the "xtime_update()" routine every clocktick
163 */ 163 */
164irqreturn_t timer_interrupt(int irq, void *dev) 164irqreturn_t timer_interrupt(int irq, void *dev)
165{ 165{
@@ -172,8 +172,6 @@ irqreturn_t timer_interrupt(int irq, void *dev)
172 profile_tick(CPU_PROFILING); 172 profile_tick(CPU_PROFILING);
173#endif 173#endif
174 174
175 write_seqlock(&xtime_lock);
176
177 /* 175 /*
178 * Calculate how many ticks have passed since the last update, 176 * Calculate how many ticks have passed since the last update,
179 * including any previous partial leftover. Save any resulting 177 * including any previous partial leftover. Save any resulting
@@ -187,9 +185,7 @@ irqreturn_t timer_interrupt(int irq, void *dev)
187 nticks = delta >> FIX_SHIFT; 185 nticks = delta >> FIX_SHIFT;
188 186
189 if (nticks) 187 if (nticks)
190 do_timer(nticks); 188 xtime_update(nticks);
191
192 write_sequnlock(&xtime_lock);
193 189
194 if (test_irq_work_pending()) { 190 if (test_irq_work_pending()) {
195 clear_irq_work_pending(); 191 clear_irq_work_pending();
diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig
index 778655f0257a..ea5ee4d067f3 100644
--- a/arch/arm/common/Kconfig
+++ b/arch/arm/common/Kconfig
@@ -6,6 +6,8 @@ config ARM_VIC
6 6
7config ARM_VIC_NR 7config ARM_VIC_NR
8 int 8 int
9 default 4 if ARCH_S5PV210
10 default 3 if ARCH_S5P6442 || ARCH_S5PC100
9 default 2 11 default 2
10 depends on ARM_VIC 12 depends on ARM_VIC
11 help 13 help
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
index b33fe7065b38..199a6b6de7f4 100644
--- a/arch/arm/include/asm/futex.h
+++ b/arch/arm/include/asm/futex.h
@@ -35,7 +35,7 @@
35 : "cc", "memory") 35 : "cc", "memory")
36 36
37static inline int 37static inline int
38futex_atomic_op_inuser (int encoded_op, int __user *uaddr) 38futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
39{ 39{
40 int op = (encoded_op >> 28) & 7; 40 int op = (encoded_op >> 28) & 7;
41 int cmp = (encoded_op >> 24) & 15; 41 int cmp = (encoded_op >> 24) & 15;
@@ -46,7 +46,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
46 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 46 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
47 oparg = 1 << oparg; 47 oparg = 1 << oparg;
48 48
49 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 49 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
50 return -EFAULT; 50 return -EFAULT;
51 51
52 pagefault_disable(); /* implies preempt_disable() */ 52 pagefault_disable(); /* implies preempt_disable() */
@@ -88,36 +88,35 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
88} 88}
89 89
90static inline int 90static inline int
91futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 91futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
92 u32 oldval, u32 newval)
92{ 93{
93 int val; 94 int ret = 0;
95 u32 val;
94 96
95 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 97 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
96 return -EFAULT; 98 return -EFAULT;
97 99
98 pagefault_disable(); /* implies preempt_disable() */
99
100 __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n" 100 __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n"
101 "1: " T(ldr) " %0, [%3]\n" 101 "1: " T(ldr) " %1, [%4]\n"
102 " teq %0, %1\n" 102 " teq %1, %2\n"
103 " it eq @ explicit IT needed for the 2b label\n" 103 " it eq @ explicit IT needed for the 2b label\n"
104 "2: " T(streq) " %2, [%3]\n" 104 "2: " T(streq) " %3, [%4]\n"
105 "3:\n" 105 "3:\n"
106 " .pushsection __ex_table,\"a\"\n" 106 " .pushsection __ex_table,\"a\"\n"
107 " .align 3\n" 107 " .align 3\n"
108 " .long 1b, 4f, 2b, 4f\n" 108 " .long 1b, 4f, 2b, 4f\n"
109 " .popsection\n" 109 " .popsection\n"
110 " .pushsection .fixup,\"ax\"\n" 110 " .pushsection .fixup,\"ax\"\n"
111 "4: mov %0, %4\n" 111 "4: mov %0, %5\n"
112 " b 3b\n" 112 " b 3b\n"
113 " .popsection" 113 " .popsection"
114 : "=&r" (val) 114 : "+r" (ret), "=&r" (val)
115 : "r" (oldval), "r" (newval), "r" (uaddr), "Ir" (-EFAULT) 115 : "r" (oldval), "r" (newval), "r" (uaddr), "Ir" (-EFAULT)
116 : "cc", "memory"); 116 : "cc", "memory");
117 117
118 pagefault_enable(); /* subsumes preempt_enable() */ 118 *uval = val;
119 119 return ret;
120 return val;
121} 120}
122 121
123#endif /* !SMP */ 122#endif /* !SMP */
diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h
index 3a0893a76a3b..bf13b814c1b8 100644
--- a/arch/arm/include/asm/mach/arch.h
+++ b/arch/arm/include/asm/mach/arch.h
@@ -15,10 +15,6 @@ struct meminfo;
15struct sys_timer; 15struct sys_timer;
16 16
17struct machine_desc { 17struct machine_desc {
18 /*
19 * Note! The first two elements are used
20 * by assembler code in head.S, head-common.S
21 */
22 unsigned int nr; /* architecture number */ 18 unsigned int nr; /* architecture number */
23 const char *name; /* architecture name */ 19 const char *name; /* architecture name */
24 unsigned long boot_params; /* tagged list */ 20 unsigned long boot_params; /* tagged list */
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index 9763be04f77e..22de005f159c 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -10,6 +10,8 @@
10#ifndef _ASMARM_PGALLOC_H 10#ifndef _ASMARM_PGALLOC_H
11#define _ASMARM_PGALLOC_H 11#define _ASMARM_PGALLOC_H
12 12
13#include <linux/pagemap.h>
14
13#include <asm/domain.h> 15#include <asm/domain.h>
14#include <asm/pgtable-hwdef.h> 16#include <asm/pgtable-hwdef.h>
15#include <asm/processor.h> 17#include <asm/processor.h>
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index d600bd350704..44b84fe6e1b0 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -836,9 +836,11 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
836/* 836/*
837 * One-time initialisation. 837 * One-time initialisation.
838 */ 838 */
839static void reset_ctrl_regs(void *unused) 839static void reset_ctrl_regs(void *info)
840{ 840{
841 int i; 841 int i, cpu = smp_processor_id();
842 u32 dbg_power;
843 cpumask_t *cpumask = info;
842 844
843 /* 845 /*
844 * v7 debug contains save and restore registers so that debug state 846 * v7 debug contains save and restore registers so that debug state
@@ -850,6 +852,17 @@ static void reset_ctrl_regs(void *unused)
850 */ 852 */
851 if (debug_arch >= ARM_DEBUG_ARCH_V7_ECP14) { 853 if (debug_arch >= ARM_DEBUG_ARCH_V7_ECP14) {
852 /* 854 /*
855 * Ensure sticky power-down is clear (i.e. debug logic is
856 * powered up).
857 */
858 asm volatile("mrc p14, 0, %0, c1, c5, 4" : "=r" (dbg_power));
859 if ((dbg_power & 0x1) == 0) {
860 pr_warning("CPU %d debug is powered down!\n", cpu);
861 cpumask_or(cpumask, cpumask, cpumask_of(cpu));
862 return;
863 }
864
865 /*
853 * Unconditionally clear the lock by writing a value 866 * Unconditionally clear the lock by writing a value
854 * other than 0xC5ACCE55 to the access register. 867 * other than 0xC5ACCE55 to the access register.
855 */ 868 */
@@ -887,6 +900,7 @@ static struct notifier_block __cpuinitdata dbg_reset_nb = {
887static int __init arch_hw_breakpoint_init(void) 900static int __init arch_hw_breakpoint_init(void)
888{ 901{
889 u32 dscr; 902 u32 dscr;
903 cpumask_t cpumask = { CPU_BITS_NONE };
890 904
891 debug_arch = get_debug_arch(); 905 debug_arch = get_debug_arch();
892 906
@@ -911,7 +925,13 @@ static int __init arch_hw_breakpoint_init(void)
911 * Reset the breakpoint resources. We assume that a halting 925 * Reset the breakpoint resources. We assume that a halting
912 * debugger will leave the world in a nice state for us. 926 * debugger will leave the world in a nice state for us.
913 */ 927 */
914 on_each_cpu(reset_ctrl_regs, NULL, 1); 928 on_each_cpu(reset_ctrl_regs, &cpumask, 1);
929 if (!cpumask_empty(&cpumask)) {
930 core_num_brps = 0;
931 core_num_reserved_brps = 0;
932 core_num_wrps = 0;
933 return 0;
934 }
915 935
916 ARM_DBG_READ(c1, 0, dscr); 936 ARM_DBG_READ(c1, 0, dscr);
917 if (dscr & ARM_DSCR_HDBGEN) { 937 if (dscr & ARM_DSCR_HDBGEN) {
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 19c6816db61e..b13e70f63d71 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -996,10 +996,10 @@ static int ptrace_gethbpregs(struct task_struct *tsk, long num,
996 while (!(arch_ctrl.len & 0x1)) 996 while (!(arch_ctrl.len & 0x1))
997 arch_ctrl.len >>= 1; 997 arch_ctrl.len >>= 1;
998 998
999 if (idx & 0x1) 999 if (num & 0x1)
1000 reg = encode_ctrl_reg(arch_ctrl);
1001 else
1002 reg = bp->attr.bp_addr; 1000 reg = bp->attr.bp_addr;
1001 else
1002 reg = encode_ctrl_reg(arch_ctrl);
1003 } 1003 }
1004 1004
1005put: 1005put:
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index 3d76bf233734..1ff46cabc7ef 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -107,9 +107,7 @@ void timer_tick(void)
107{ 107{
108 profile_tick(CPU_PROFILING); 108 profile_tick(CPU_PROFILING);
109 do_leds(); 109 do_leds();
110 write_seqlock(&xtime_lock); 110 xtime_update(1);
111 do_timer(1);
112 write_sequnlock(&xtime_lock);
113#ifndef CONFIG_SMP 111#ifndef CONFIG_SMP
114 update_process_times(user_mode(get_irq_regs())); 112 update_process_times(user_mode(get_irq_regs()));
115#endif 113#endif
diff --git a/arch/arm/mach-clps711x/include/mach/time.h b/arch/arm/mach-clps711x/include/mach/time.h
index 8fe283ccd1f3..61fef9129c6a 100644
--- a/arch/arm/mach-clps711x/include/mach/time.h
+++ b/arch/arm/mach-clps711x/include/mach/time.h
@@ -30,7 +30,7 @@ p720t_timer_interrupt(int irq, void *dev_id)
30{ 30{
31 struct pt_regs *regs = get_irq_regs(); 31 struct pt_regs *regs = get_irq_regs();
32 do_leds(); 32 do_leds();
33 do_timer(1); 33 xtime_update(1);
34#ifndef CONFIG_SMP 34#ifndef CONFIG_SMP
35 update_process_times(user_mode(regs)); 35 update_process_times(user_mode(regs));
36#endif 36#endif
diff --git a/arch/arm/mach-davinci/cpufreq.c b/arch/arm/mach-davinci/cpufreq.c
index 343de73161fa..4a68c2b1ec11 100644
--- a/arch/arm/mach-davinci/cpufreq.c
+++ b/arch/arm/mach-davinci/cpufreq.c
@@ -132,7 +132,7 @@ out:
132 return ret; 132 return ret;
133} 133}
134 134
135static int __init davinci_cpu_init(struct cpufreq_policy *policy) 135static int davinci_cpu_init(struct cpufreq_policy *policy)
136{ 136{
137 int result = 0; 137 int result = 0;
138 struct davinci_cpufreq_config *pdata = cpufreq.dev->platform_data; 138 struct davinci_cpufreq_config *pdata = cpufreq.dev->platform_data;
diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c
index 9eec63070e0c..beda8a4133a0 100644
--- a/arch/arm/mach-davinci/devices-da8xx.c
+++ b/arch/arm/mach-davinci/devices-da8xx.c
@@ -480,8 +480,15 @@ static struct platform_device da850_mcasp_device = {
480 .resource = da850_mcasp_resources, 480 .resource = da850_mcasp_resources,
481}; 481};
482 482
483struct platform_device davinci_pcm_device = {
484 .name = "davinci-pcm-audio",
485 .id = -1,
486};
487
483void __init da8xx_register_mcasp(int id, struct snd_platform_data *pdata) 488void __init da8xx_register_mcasp(int id, struct snd_platform_data *pdata)
484{ 489{
490 platform_device_register(&davinci_pcm_device);
491
485 /* DA830/OMAP-L137 has 3 instances of McASP */ 492 /* DA830/OMAP-L137 has 3 instances of McASP */
486 if (cpu_is_davinci_da830() && id == 1) { 493 if (cpu_is_davinci_da830() && id == 1) {
487 da830_mcasp1_device.dev.platform_data = pdata; 494 da830_mcasp1_device.dev.platform_data = pdata;
diff --git a/arch/arm/mach-davinci/gpio-tnetv107x.c b/arch/arm/mach-davinci/gpio-tnetv107x.c
index d10298620e2c..3fa3e2867e19 100644
--- a/arch/arm/mach-davinci/gpio-tnetv107x.c
+++ b/arch/arm/mach-davinci/gpio-tnetv107x.c
@@ -58,7 +58,7 @@ static int tnetv107x_gpio_request(struct gpio_chip *chip, unsigned offset)
58 58
59 spin_lock_irqsave(&ctlr->lock, flags); 59 spin_lock_irqsave(&ctlr->lock, flags);
60 60
61 gpio_reg_set_bit(&regs->enable, gpio); 61 gpio_reg_set_bit(regs->enable, gpio);
62 62
63 spin_unlock_irqrestore(&ctlr->lock, flags); 63 spin_unlock_irqrestore(&ctlr->lock, flags);
64 64
@@ -74,7 +74,7 @@ static void tnetv107x_gpio_free(struct gpio_chip *chip, unsigned offset)
74 74
75 spin_lock_irqsave(&ctlr->lock, flags); 75 spin_lock_irqsave(&ctlr->lock, flags);
76 76
77 gpio_reg_clear_bit(&regs->enable, gpio); 77 gpio_reg_clear_bit(regs->enable, gpio);
78 78
79 spin_unlock_irqrestore(&ctlr->lock, flags); 79 spin_unlock_irqrestore(&ctlr->lock, flags);
80} 80}
@@ -88,7 +88,7 @@ static int tnetv107x_gpio_dir_in(struct gpio_chip *chip, unsigned offset)
88 88
89 spin_lock_irqsave(&ctlr->lock, flags); 89 spin_lock_irqsave(&ctlr->lock, flags);
90 90
91 gpio_reg_set_bit(&regs->direction, gpio); 91 gpio_reg_set_bit(regs->direction, gpio);
92 92
93 spin_unlock_irqrestore(&ctlr->lock, flags); 93 spin_unlock_irqrestore(&ctlr->lock, flags);
94 94
@@ -106,11 +106,11 @@ static int tnetv107x_gpio_dir_out(struct gpio_chip *chip,
106 spin_lock_irqsave(&ctlr->lock, flags); 106 spin_lock_irqsave(&ctlr->lock, flags);
107 107
108 if (value) 108 if (value)
109 gpio_reg_set_bit(&regs->data_out, gpio); 109 gpio_reg_set_bit(regs->data_out, gpio);
110 else 110 else
111 gpio_reg_clear_bit(&regs->data_out, gpio); 111 gpio_reg_clear_bit(regs->data_out, gpio);
112 112
113 gpio_reg_clear_bit(&regs->direction, gpio); 113 gpio_reg_clear_bit(regs->direction, gpio);
114 114
115 spin_unlock_irqrestore(&ctlr->lock, flags); 115 spin_unlock_irqrestore(&ctlr->lock, flags);
116 116
@@ -124,7 +124,7 @@ static int tnetv107x_gpio_get(struct gpio_chip *chip, unsigned offset)
124 unsigned gpio = chip->base + offset; 124 unsigned gpio = chip->base + offset;
125 int ret; 125 int ret;
126 126
127 ret = gpio_reg_get_bit(&regs->data_in, gpio); 127 ret = gpio_reg_get_bit(regs->data_in, gpio);
128 128
129 return ret ? 1 : 0; 129 return ret ? 1 : 0;
130} 130}
@@ -140,9 +140,9 @@ static void tnetv107x_gpio_set(struct gpio_chip *chip,
140 spin_lock_irqsave(&ctlr->lock, flags); 140 spin_lock_irqsave(&ctlr->lock, flags);
141 141
142 if (value) 142 if (value)
143 gpio_reg_set_bit(&regs->data_out, gpio); 143 gpio_reg_set_bit(regs->data_out, gpio);
144 else 144 else
145 gpio_reg_clear_bit(&regs->data_out, gpio); 145 gpio_reg_clear_bit(regs->data_out, gpio);
146 146
147 spin_unlock_irqrestore(&ctlr->lock, flags); 147 spin_unlock_irqrestore(&ctlr->lock, flags);
148} 148}
diff --git a/arch/arm/mach-davinci/include/mach/clkdev.h b/arch/arm/mach-davinci/include/mach/clkdev.h
index 730c49d1ebd8..14a504887189 100644
--- a/arch/arm/mach-davinci/include/mach/clkdev.h
+++ b/arch/arm/mach-davinci/include/mach/clkdev.h
@@ -1,6 +1,8 @@
1#ifndef __MACH_CLKDEV_H 1#ifndef __MACH_CLKDEV_H
2#define __MACH_CLKDEV_H 2#define __MACH_CLKDEV_H
3 3
4struct clk;
5
4static inline int __clk_get(struct clk *clk) 6static inline int __clk_get(struct clk *clk)
5{ 7{
6 return 1; 8 return 1;
diff --git a/arch/arm/mach-omap2/clkt_dpll.c b/arch/arm/mach-omap2/clkt_dpll.c
index 337392c3f549..acb7ae5b0a25 100644
--- a/arch/arm/mach-omap2/clkt_dpll.c
+++ b/arch/arm/mach-omap2/clkt_dpll.c
@@ -77,7 +77,7 @@ static int _dpll_test_fint(struct clk *clk, u8 n)
77 dd = clk->dpll_data; 77 dd = clk->dpll_data;
78 78
79 /* DPLL divider must result in a valid jitter correction val */ 79 /* DPLL divider must result in a valid jitter correction val */
80 fint = clk->parent->rate / (n + 1); 80 fint = clk->parent->rate / n;
81 if (fint < DPLL_FINT_BAND1_MIN) { 81 if (fint < DPLL_FINT_BAND1_MIN) {
82 82
83 pr_debug("rejecting n=%d due to Fint failure, " 83 pr_debug("rejecting n=%d due to Fint failure, "
diff --git a/arch/arm/mach-omap2/mailbox.c b/arch/arm/mach-omap2/mailbox.c
index 394413dc7deb..24b88504df0f 100644
--- a/arch/arm/mach-omap2/mailbox.c
+++ b/arch/arm/mach-omap2/mailbox.c
@@ -193,10 +193,12 @@ static void omap2_mbox_disable_irq(struct omap_mbox *mbox,
193 omap_mbox_type_t irq) 193 omap_mbox_type_t irq)
194{ 194{
195 struct omap_mbox2_priv *p = mbox->priv; 195 struct omap_mbox2_priv *p = mbox->priv;
196 u32 l, bit = (irq == IRQ_TX) ? p->notfull_bit : p->newmsg_bit; 196 u32 bit = (irq == IRQ_TX) ? p->notfull_bit : p->newmsg_bit;
197 l = mbox_read_reg(p->irqdisable); 197
198 l &= ~bit; 198 if (!cpu_is_omap44xx())
199 mbox_write_reg(l, p->irqdisable); 199 bit = mbox_read_reg(p->irqdisable) & ~bit;
200
201 mbox_write_reg(bit, p->irqdisable);
200} 202}
201 203
202static void omap2_mbox_ack_irq(struct omap_mbox *mbox, 204static void omap2_mbox_ack_irq(struct omap_mbox *mbox,
@@ -334,7 +336,7 @@ static struct omap_mbox mbox_iva_info = {
334 .priv = &omap2_mbox_iva_priv, 336 .priv = &omap2_mbox_iva_priv,
335}; 337};
336 338
337struct omap_mbox *omap2_mboxes[] = { &mbox_iva_info, &mbox_dsp_info, NULL }; 339struct omap_mbox *omap2_mboxes[] = { &mbox_dsp_info, &mbox_iva_info, NULL };
338#endif 340#endif
339 341
340#if defined(CONFIG_ARCH_OMAP4) 342#if defined(CONFIG_ARCH_OMAP4)
diff --git a/arch/arm/mach-omap2/mux.c b/arch/arm/mach-omap2/mux.c
index 98148b6c36e9..6c84659cf846 100644
--- a/arch/arm/mach-omap2/mux.c
+++ b/arch/arm/mach-omap2/mux.c
@@ -605,7 +605,7 @@ static void __init omap_mux_dbg_create_entry(
605 list_for_each_entry(e, &partition->muxmodes, node) { 605 list_for_each_entry(e, &partition->muxmodes, node) {
606 struct omap_mux *m = &e->mux; 606 struct omap_mux *m = &e->mux;
607 607
608 (void)debugfs_create_file(m->muxnames[0], S_IWUGO, mux_dbg_dir, 608 (void)debugfs_create_file(m->muxnames[0], S_IWUSR, mux_dbg_dir,
609 m, &omap_mux_dbg_signal_fops); 609 m, &omap_mux_dbg_signal_fops);
610 } 610 }
611} 611}
diff --git a/arch/arm/mach-omap2/pm-debug.c b/arch/arm/mach-omap2/pm-debug.c
index 125f56591fb5..a5a83b358ddd 100644
--- a/arch/arm/mach-omap2/pm-debug.c
+++ b/arch/arm/mach-omap2/pm-debug.c
@@ -637,14 +637,14 @@ static int __init pm_dbg_init(void)
637 637
638 } 638 }
639 639
640 (void) debugfs_create_file("enable_off_mode", S_IRUGO | S_IWUGO, d, 640 (void) debugfs_create_file("enable_off_mode", S_IRUGO | S_IWUSR, d,
641 &enable_off_mode, &pm_dbg_option_fops); 641 &enable_off_mode, &pm_dbg_option_fops);
642 (void) debugfs_create_file("sleep_while_idle", S_IRUGO | S_IWUGO, d, 642 (void) debugfs_create_file("sleep_while_idle", S_IRUGO | S_IWUSR, d,
643 &sleep_while_idle, &pm_dbg_option_fops); 643 &sleep_while_idle, &pm_dbg_option_fops);
644 (void) debugfs_create_file("wakeup_timer_seconds", S_IRUGO | S_IWUGO, d, 644 (void) debugfs_create_file("wakeup_timer_seconds", S_IRUGO | S_IWUSR, d,
645 &wakeup_timer_seconds, &pm_dbg_option_fops); 645 &wakeup_timer_seconds, &pm_dbg_option_fops);
646 (void) debugfs_create_file("wakeup_timer_milliseconds", 646 (void) debugfs_create_file("wakeup_timer_milliseconds",
647 S_IRUGO | S_IWUGO, d, &wakeup_timer_milliseconds, 647 S_IRUGO | S_IWUSR, d, &wakeup_timer_milliseconds,
648 &pm_dbg_option_fops); 648 &pm_dbg_option_fops);
649 pm_dbg_init_done = 1; 649 pm_dbg_init_done = 1;
650 650
diff --git a/arch/arm/mach-omap2/prcm_mpu44xx.h b/arch/arm/mach-omap2/prcm_mpu44xx.h
index 729a644ce852..3300ff6e3cfe 100644
--- a/arch/arm/mach-omap2/prcm_mpu44xx.h
+++ b/arch/arm/mach-omap2/prcm_mpu44xx.h
@@ -38,8 +38,8 @@
38#define OMAP4430_PRCM_MPU_CPU1_INST 0x0800 38#define OMAP4430_PRCM_MPU_CPU1_INST 0x0800
39 39
40/* PRCM_MPU clockdomain register offsets (from instance start) */ 40/* PRCM_MPU clockdomain register offsets (from instance start) */
41#define OMAP4430_PRCM_MPU_CPU0_MPU_CDOFFS 0x0000 41#define OMAP4430_PRCM_MPU_CPU0_MPU_CDOFFS 0x0018
42#define OMAP4430_PRCM_MPU_CPU1_MPU_CDOFFS 0x0000 42#define OMAP4430_PRCM_MPU_CPU1_MPU_CDOFFS 0x0018
43 43
44 44
45/* 45/*
diff --git a/arch/arm/mach-omap2/smartreflex.c b/arch/arm/mach-omap2/smartreflex.c
index c37e823266d3..1a777e34d0c2 100644
--- a/arch/arm/mach-omap2/smartreflex.c
+++ b/arch/arm/mach-omap2/smartreflex.c
@@ -282,6 +282,7 @@ error:
282 dev_err(&sr_info->pdev->dev, "%s: ERROR in registering" 282 dev_err(&sr_info->pdev->dev, "%s: ERROR in registering"
283 "interrupt handler. Smartreflex will" 283 "interrupt handler. Smartreflex will"
284 "not function as desired\n", __func__); 284 "not function as desired\n", __func__);
285 kfree(name);
285 kfree(sr_info); 286 kfree(sr_info);
286 return ret; 287 return ret;
287} 288}
@@ -879,7 +880,7 @@ static int __init omap_sr_probe(struct platform_device *pdev)
879 ret = sr_late_init(sr_info); 880 ret = sr_late_init(sr_info);
880 if (ret) { 881 if (ret) {
881 pr_warning("%s: Error in SR late init\n", __func__); 882 pr_warning("%s: Error in SR late init\n", __func__);
882 return ret; 883 goto err_release_region;
883 } 884 }
884 } 885 }
885 886
@@ -890,17 +891,20 @@ static int __init omap_sr_probe(struct platform_device *pdev)
890 * not try to create rest of the debugfs entries. 891 * not try to create rest of the debugfs entries.
891 */ 892 */
892 vdd_dbg_dir = omap_voltage_get_dbgdir(sr_info->voltdm); 893 vdd_dbg_dir = omap_voltage_get_dbgdir(sr_info->voltdm);
893 if (!vdd_dbg_dir) 894 if (!vdd_dbg_dir) {
894 return -EINVAL; 895 ret = -EINVAL;
896 goto err_release_region;
897 }
895 898
896 dbg_dir = debugfs_create_dir("smartreflex", vdd_dbg_dir); 899 dbg_dir = debugfs_create_dir("smartreflex", vdd_dbg_dir);
897 if (IS_ERR(dbg_dir)) { 900 if (IS_ERR(dbg_dir)) {
898 dev_err(&pdev->dev, "%s: Unable to create debugfs directory\n", 901 dev_err(&pdev->dev, "%s: Unable to create debugfs directory\n",
899 __func__); 902 __func__);
900 return PTR_ERR(dbg_dir); 903 ret = PTR_ERR(dbg_dir);
904 goto err_release_region;
901 } 905 }
902 906
903 (void) debugfs_create_file("autocomp", S_IRUGO | S_IWUGO, dbg_dir, 907 (void) debugfs_create_file("autocomp", S_IRUGO | S_IWUSR, dbg_dir,
904 (void *)sr_info, &pm_sr_fops); 908 (void *)sr_info, &pm_sr_fops);
905 (void) debugfs_create_x32("errweight", S_IRUGO, dbg_dir, 909 (void) debugfs_create_x32("errweight", S_IRUGO, dbg_dir,
906 &sr_info->err_weight); 910 &sr_info->err_weight);
@@ -913,7 +917,8 @@ static int __init omap_sr_probe(struct platform_device *pdev)
913 if (IS_ERR(nvalue_dir)) { 917 if (IS_ERR(nvalue_dir)) {
914 dev_err(&pdev->dev, "%s: Unable to create debugfs directory" 918 dev_err(&pdev->dev, "%s: Unable to create debugfs directory"
915 "for n-values\n", __func__); 919 "for n-values\n", __func__);
916 return PTR_ERR(nvalue_dir); 920 ret = PTR_ERR(nvalue_dir);
921 goto err_release_region;
917 } 922 }
918 923
919 omap_voltage_get_volttable(sr_info->voltdm, &volt_data); 924 omap_voltage_get_volttable(sr_info->voltdm, &volt_data);
@@ -922,24 +927,16 @@ static int __init omap_sr_probe(struct platform_device *pdev)
922 " corresponding vdd vdd_%s. Cannot create debugfs" 927 " corresponding vdd vdd_%s. Cannot create debugfs"
923 "entries for n-values\n", 928 "entries for n-values\n",
924 __func__, sr_info->voltdm->name); 929 __func__, sr_info->voltdm->name);
925 return -ENODATA; 930 ret = -ENODATA;
931 goto err_release_region;
926 } 932 }
927 933
928 for (i = 0; i < sr_info->nvalue_count; i++) { 934 for (i = 0; i < sr_info->nvalue_count; i++) {
929 char *name; 935 char name[NVALUE_NAME_LEN + 1];
930 char volt_name[32];
931
932 name = kzalloc(NVALUE_NAME_LEN + 1, GFP_KERNEL);
933 if (!name) {
934 dev_err(&pdev->dev, "%s: Unable to allocate memory"
935 " for n-value directory name\n", __func__);
936 return -ENOMEM;
937 }
938 936
939 strcpy(name, "volt_"); 937 snprintf(name, sizeof(name), "volt_%d",
940 sprintf(volt_name, "%d", volt_data[i].volt_nominal); 938 volt_data[i].volt_nominal);
941 strcat(name, volt_name); 939 (void) debugfs_create_x32(name, S_IRUGO | S_IWUSR, nvalue_dir,
942 (void) debugfs_create_x32(name, S_IRUGO | S_IWUGO, nvalue_dir,
943 &(sr_info->nvalue_table[i].nvalue)); 940 &(sr_info->nvalue_table[i].nvalue));
944 } 941 }
945 942
diff --git a/arch/arm/mach-omap2/timer-gp.c b/arch/arm/mach-omap2/timer-gp.c
index 7b7c2683ae7b..0fc550e7e482 100644
--- a/arch/arm/mach-omap2/timer-gp.c
+++ b/arch/arm/mach-omap2/timer-gp.c
@@ -39,6 +39,7 @@
39#include <asm/mach/time.h> 39#include <asm/mach/time.h>
40#include <plat/dmtimer.h> 40#include <plat/dmtimer.h>
41#include <asm/localtimer.h> 41#include <asm/localtimer.h>
42#include <asm/sched_clock.h>
42 43
43#include "timer-gp.h" 44#include "timer-gp.h"
44 45
@@ -190,6 +191,7 @@ static void __init omap2_gp_clocksource_init(void)
190/* 191/*
191 * clocksource 192 * clocksource
192 */ 193 */
194static DEFINE_CLOCK_DATA(cd);
193static struct omap_dm_timer *gpt_clocksource; 195static struct omap_dm_timer *gpt_clocksource;
194static cycle_t clocksource_read_cycles(struct clocksource *cs) 196static cycle_t clocksource_read_cycles(struct clocksource *cs)
195{ 197{
@@ -204,6 +206,15 @@ static struct clocksource clocksource_gpt = {
204 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 206 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
205}; 207};
206 208
209static void notrace dmtimer_update_sched_clock(void)
210{
211 u32 cyc;
212
213 cyc = omap_dm_timer_read_counter(gpt_clocksource);
214
215 update_sched_clock(&cd, cyc, (u32)~0);
216}
217
207/* Setup free-running counter for clocksource */ 218/* Setup free-running counter for clocksource */
208static void __init omap2_gp_clocksource_init(void) 219static void __init omap2_gp_clocksource_init(void)
209{ 220{
@@ -224,6 +235,8 @@ static void __init omap2_gp_clocksource_init(void)
224 235
225 omap_dm_timer_set_load_start(gpt, 1, 0); 236 omap_dm_timer_set_load_start(gpt, 1, 0);
226 237
238 init_sched_clock(&cd, dmtimer_update_sched_clock, 32, tick_rate);
239
227 if (clocksource_register_hz(&clocksource_gpt, tick_rate)) 240 if (clocksource_register_hz(&clocksource_gpt, tick_rate))
228 printk(err2, clocksource_gpt.name); 241 printk(err2, clocksource_gpt.name);
229} 242}
diff --git a/arch/arm/mach-pxa/pxa25x.c b/arch/arm/mach-pxa/pxa25x.c
index fbc5b775f895..b166b1d845d7 100644
--- a/arch/arm/mach-pxa/pxa25x.c
+++ b/arch/arm/mach-pxa/pxa25x.c
@@ -347,6 +347,7 @@ static struct platform_device *pxa25x_devices[] __initdata = {
347 &pxa25x_device_assp, 347 &pxa25x_device_assp,
348 &pxa25x_device_pwm0, 348 &pxa25x_device_pwm0,
349 &pxa25x_device_pwm1, 349 &pxa25x_device_pwm1,
350 &pxa_device_asoc_platform,
350}; 351};
351 352
352static struct sys_device pxa25x_sysdev[] = { 353static struct sys_device pxa25x_sysdev[] = {
diff --git a/arch/arm/mach-pxa/tosa-bt.c b/arch/arm/mach-pxa/tosa-bt.c
index c31e601eb49c..b9b1e5c2b290 100644
--- a/arch/arm/mach-pxa/tosa-bt.c
+++ b/arch/arm/mach-pxa/tosa-bt.c
@@ -81,8 +81,6 @@ static int tosa_bt_probe(struct platform_device *dev)
81 goto err_rfk_alloc; 81 goto err_rfk_alloc;
82 } 82 }
83 83
84 rfkill_set_led_trigger_name(rfk, "tosa-bt");
85
86 rc = rfkill_register(rfk); 84 rc = rfkill_register(rfk);
87 if (rc) 85 if (rc)
88 goto err_rfkill; 86 goto err_rfkill;
diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index af152e70cfcf..f2582ec300d9 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -875,6 +875,11 @@ static struct platform_device sharpsl_rom_device = {
875 .dev.platform_data = &sharpsl_rom_data, 875 .dev.platform_data = &sharpsl_rom_data,
876}; 876};
877 877
878static struct platform_device wm9712_device = {
879 .name = "wm9712-codec",
880 .id = -1,
881};
882
878static struct platform_device *devices[] __initdata = { 883static struct platform_device *devices[] __initdata = {
879 &tosascoop_device, 884 &tosascoop_device,
880 &tosascoop_jc_device, 885 &tosascoop_jc_device,
@@ -885,6 +890,7 @@ static struct platform_device *devices[] __initdata = {
885 &tosaled_device, 890 &tosaled_device,
886 &tosa_bt_device, 891 &tosa_bt_device,
887 &sharpsl_rom_device, 892 &sharpsl_rom_device,
893 &wm9712_device,
888}; 894};
889 895
890static void tosa_poweroff(void) 896static void tosa_poweroff(void)
diff --git a/arch/arm/mach-s3c2440/Kconfig b/arch/arm/mach-s3c2440/Kconfig
index a0cb2581894f..50825a3f91cc 100644
--- a/arch/arm/mach-s3c2440/Kconfig
+++ b/arch/arm/mach-s3c2440/Kconfig
@@ -99,6 +99,7 @@ config MACH_NEO1973_GTA02
99 select POWER_SUPPLY 99 select POWER_SUPPLY
100 select MACH_NEO1973 100 select MACH_NEO1973
101 select S3C2410_PWM 101 select S3C2410_PWM
102 select S3C_DEV_USB_HOST
102 help 103 help
103 Say Y here if you are using the Openmoko GTA02 / Freerunner GSM Phone 104 Say Y here if you are using the Openmoko GTA02 / Freerunner GSM Phone
104 105
diff --git a/arch/arm/mach-s3c2440/include/mach/gta02.h b/arch/arm/mach-s3c2440/include/mach/gta02.h
index 953331d8d56a..3a56a229cac6 100644
--- a/arch/arm/mach-s3c2440/include/mach/gta02.h
+++ b/arch/arm/mach-s3c2440/include/mach/gta02.h
@@ -44,19 +44,19 @@
44#define GTA02v3_GPIO_nUSB_FLT S3C2410_GPG(10) /* v3 + v4 only */ 44#define GTA02v3_GPIO_nUSB_FLT S3C2410_GPG(10) /* v3 + v4 only */
45#define GTA02v3_GPIO_nGSM_OC S3C2410_GPG(11) /* v3 + v4 only */ 45#define GTA02v3_GPIO_nGSM_OC S3C2410_GPG(11) /* v3 + v4 only */
46 46
47#define GTA02_GPIO_AMP_SHUT S3C2440_GPJ1 /* v2 + v3 + v4 only */ 47#define GTA02_GPIO_AMP_SHUT S3C2410_GPJ(1) /* v2 + v3 + v4 only */
48#define GTA02v1_GPIO_WLAN_GPIO10 S3C2440_GPJ2 48#define GTA02v1_GPIO_WLAN_GPIO10 S3C2410_GPJ(2)
49#define GTA02_GPIO_HP_IN S3C2440_GPJ2 /* v2 + v3 + v4 only */ 49#define GTA02_GPIO_HP_IN S3C2410_GPJ(2) /* v2 + v3 + v4 only */
50#define GTA02_GPIO_INT0 S3C2440_GPJ3 /* v2 + v3 + v4 only */ 50#define GTA02_GPIO_INT0 S3C2410_GPJ(3) /* v2 + v3 + v4 only */
51#define GTA02_GPIO_nGSM_EN S3C2440_GPJ4 51#define GTA02_GPIO_nGSM_EN S3C2410_GPJ(4)
52#define GTA02_GPIO_3D_RESET S3C2440_GPJ5 52#define GTA02_GPIO_3D_RESET S3C2410_GPJ(5)
53#define GTA02_GPIO_nDL_GSM S3C2440_GPJ6 /* v4 + v5 only */ 53#define GTA02_GPIO_nDL_GSM S3C2410_GPJ(6) /* v4 + v5 only */
54#define GTA02_GPIO_WLAN_GPIO0 S3C2440_GPJ7 54#define GTA02_GPIO_WLAN_GPIO0 S3C2410_GPJ(7)
55#define GTA02v1_GPIO_BAT_ID S3C2440_GPJ8 55#define GTA02v1_GPIO_BAT_ID S3C2410_GPJ(8)
56#define GTA02_GPIO_KEEPACT S3C2440_GPJ8 56#define GTA02_GPIO_KEEPACT S3C2410_GPJ(8)
57#define GTA02v1_GPIO_HP_IN S3C2440_GPJ10 57#define GTA02v1_GPIO_HP_IN S3C2410_GPJ(10)
58#define GTA02_CHIP_PWD S3C2440_GPJ11 /* v2 + v3 + v4 only */ 58#define GTA02_CHIP_PWD S3C2410_GPJ(11) /* v2 + v3 + v4 only */
59#define GTA02_GPIO_nWLAN_RESET S3C2440_GPJ12 /* v2 + v3 + v4 only */ 59#define GTA02_GPIO_nWLAN_RESET S3C2410_GPJ(12) /* v2 + v3 + v4 only */
60 60
61#define GTA02_IRQ_GSENSOR_1 IRQ_EINT0 61#define GTA02_IRQ_GSENSOR_1 IRQ_EINT0
62#define GTA02_IRQ_MODEM IRQ_EINT1 62#define GTA02_IRQ_MODEM IRQ_EINT1
diff --git a/arch/arm/mach-s3c64xx/clock.c b/arch/arm/mach-s3c64xx/clock.c
index dd3782064508..fdfc4d5e37a1 100644
--- a/arch/arm/mach-s3c64xx/clock.c
+++ b/arch/arm/mach-s3c64xx/clock.c
@@ -151,6 +151,12 @@ static struct clk init_clocks_off[] = {
151 .enable = s3c64xx_pclk_ctrl, 151 .enable = s3c64xx_pclk_ctrl,
152 .ctrlbit = S3C_CLKCON_PCLK_IIC, 152 .ctrlbit = S3C_CLKCON_PCLK_IIC,
153 }, { 153 }, {
154 .name = "i2c",
155 .id = 1,
156 .parent = &clk_p,
157 .enable = s3c64xx_pclk_ctrl,
158 .ctrlbit = S3C6410_CLKCON_PCLK_I2C1,
159 }, {
154 .name = "iis", 160 .name = "iis",
155 .id = 0, 161 .id = 0,
156 .parent = &clk_p, 162 .parent = &clk_p,
diff --git a/arch/arm/mach-s3c64xx/dma.c b/arch/arm/mach-s3c64xx/dma.c
index 135db1b41252..c35585cf8c4f 100644
--- a/arch/arm/mach-s3c64xx/dma.c
+++ b/arch/arm/mach-s3c64xx/dma.c
@@ -690,12 +690,12 @@ static int s3c64xx_dma_init1(int chno, enum dma_ch chbase,
690 690
691 regptr = regs + PL080_Cx_BASE(0); 691 regptr = regs + PL080_Cx_BASE(0);
692 692
693 for (ch = 0; ch < 8; ch++, chno++, chptr++) { 693 for (ch = 0; ch < 8; ch++, chptr++) {
694 printk(KERN_INFO "%s: registering DMA %d (%p)\n", 694 pr_debug("%s: registering DMA %d (%p)\n",
695 __func__, chno, regptr); 695 __func__, chno + ch, regptr);
696 696
697 chptr->bit = 1 << ch; 697 chptr->bit = 1 << ch;
698 chptr->number = chno; 698 chptr->number = chno + ch;
699 chptr->dmac = dmac; 699 chptr->dmac = dmac;
700 chptr->regs = regptr; 700 chptr->regs = regptr;
701 regptr += PL080_Cx_STRIDE; 701 regptr += PL080_Cx_STRIDE;
@@ -704,7 +704,8 @@ static int s3c64xx_dma_init1(int chno, enum dma_ch chbase,
704 /* for the moment, permanently enable the controller */ 704 /* for the moment, permanently enable the controller */
705 writel(PL080_CONFIG_ENABLE, regs + PL080_CONFIG); 705 writel(PL080_CONFIG_ENABLE, regs + PL080_CONFIG);
706 706
707 printk(KERN_INFO "PL080: IRQ %d, at %p\n", irq, regs); 707 printk(KERN_INFO "PL080: IRQ %d, at %p, channels %d..%d\n",
708 irq, regs, chno, chno+8);
708 709
709 return 0; 710 return 0;
710 711
diff --git a/arch/arm/mach-s3c64xx/gpiolib.c b/arch/arm/mach-s3c64xx/gpiolib.c
index fd99a82e82c4..92b09085caaa 100644
--- a/arch/arm/mach-s3c64xx/gpiolib.c
+++ b/arch/arm/mach-s3c64xx/gpiolib.c
@@ -72,7 +72,7 @@ static struct s3c_gpio_cfg gpio_4bit_cfg_eint0011 = {
72 .get_pull = s3c_gpio_getpull_updown, 72 .get_pull = s3c_gpio_getpull_updown,
73}; 73};
74 74
75int s3c64xx_gpio2int_gpm(struct gpio_chip *chip, unsigned pin) 75static int s3c64xx_gpio2int_gpm(struct gpio_chip *chip, unsigned pin)
76{ 76{
77 return pin < 5 ? IRQ_EINT(23) + pin : -ENXIO; 77 return pin < 5 ? IRQ_EINT(23) + pin : -ENXIO;
78} 78}
@@ -138,7 +138,7 @@ static struct s3c_gpio_chip gpio_4bit[] = {
138 }, 138 },
139}; 139};
140 140
141int s3c64xx_gpio2int_gpl(struct gpio_chip *chip, unsigned pin) 141static int s3c64xx_gpio2int_gpl(struct gpio_chip *chip, unsigned pin)
142{ 142{
143 return pin >= 8 ? IRQ_EINT(16) + pin - 8 : -ENXIO; 143 return pin >= 8 ? IRQ_EINT(16) + pin - 8 : -ENXIO;
144} 144}
diff --git a/arch/arm/mach-s3c64xx/mach-smdk6410.c b/arch/arm/mach-s3c64xx/mach-smdk6410.c
index e85192a86fbe..a80a3163dd30 100644
--- a/arch/arm/mach-s3c64xx/mach-smdk6410.c
+++ b/arch/arm/mach-s3c64xx/mach-smdk6410.c
@@ -28,6 +28,7 @@
28#include <linux/delay.h> 28#include <linux/delay.h>
29#include <linux/smsc911x.h> 29#include <linux/smsc911x.h>
30#include <linux/regulator/fixed.h> 30#include <linux/regulator/fixed.h>
31#include <linux/regulator/machine.h>
31 32
32#ifdef CONFIG_SMDK6410_WM1190_EV1 33#ifdef CONFIG_SMDK6410_WM1190_EV1
33#include <linux/mfd/wm8350/core.h> 34#include <linux/mfd/wm8350/core.h>
@@ -351,7 +352,7 @@ static struct regulator_init_data smdk6410_vddpll = {
351/* VDD_UH_MMC, LDO5 on J5 */ 352/* VDD_UH_MMC, LDO5 on J5 */
352static struct regulator_init_data smdk6410_vdduh_mmc = { 353static struct regulator_init_data smdk6410_vdduh_mmc = {
353 .constraints = { 354 .constraints = {
354 .name = "PVDD_UH/PVDD_MMC", 355 .name = "PVDD_UH+PVDD_MMC",
355 .always_on = 1, 356 .always_on = 1,
356 }, 357 },
357}; 358};
@@ -417,7 +418,7 @@ static struct regulator_init_data smdk6410_vddaudio = {
417/* S3C64xx internal logic & PLL */ 418/* S3C64xx internal logic & PLL */
418static struct regulator_init_data wm8350_dcdc1_data = { 419static struct regulator_init_data wm8350_dcdc1_data = {
419 .constraints = { 420 .constraints = {
420 .name = "PVDD_INT/PVDD_PLL", 421 .name = "PVDD_INT+PVDD_PLL",
421 .min_uV = 1200000, 422 .min_uV = 1200000,
422 .max_uV = 1200000, 423 .max_uV = 1200000,
423 .always_on = 1, 424 .always_on = 1,
@@ -452,7 +453,7 @@ static struct regulator_consumer_supply wm8350_dcdc4_consumers[] = {
452 453
453static struct regulator_init_data wm8350_dcdc4_data = { 454static struct regulator_init_data wm8350_dcdc4_data = {
454 .constraints = { 455 .constraints = {
455 .name = "PVDD_HI/PVDD_EXT/PVDD_SYS/PVCCM2MTV", 456 .name = "PVDD_HI+PVDD_EXT+PVDD_SYS+PVCCM2MTV",
456 .min_uV = 3000000, 457 .min_uV = 3000000,
457 .max_uV = 3000000, 458 .max_uV = 3000000,
458 .always_on = 1, 459 .always_on = 1,
@@ -464,7 +465,7 @@ static struct regulator_init_data wm8350_dcdc4_data = {
464/* OTGi/1190-EV1 HPVDD & AVDD */ 465/* OTGi/1190-EV1 HPVDD & AVDD */
465static struct regulator_init_data wm8350_ldo4_data = { 466static struct regulator_init_data wm8350_ldo4_data = {
466 .constraints = { 467 .constraints = {
467 .name = "PVDD_OTGI/HPVDD/AVDD", 468 .name = "PVDD_OTGI+HPVDD+AVDD",
468 .min_uV = 1200000, 469 .min_uV = 1200000,
469 .max_uV = 1200000, 470 .max_uV = 1200000,
470 .apply_uV = 1, 471 .apply_uV = 1,
@@ -552,7 +553,7 @@ static struct wm831x_backlight_pdata wm1192_backlight_pdata = {
552 553
553static struct regulator_init_data wm1192_dcdc3 = { 554static struct regulator_init_data wm1192_dcdc3 = {
554 .constraints = { 555 .constraints = {
555 .name = "PVDD_MEM/PVDD_GPS", 556 .name = "PVDD_MEM+PVDD_GPS",
556 .always_on = 1, 557 .always_on = 1,
557 }, 558 },
558}; 559};
@@ -563,7 +564,7 @@ static struct regulator_consumer_supply wm1192_ldo1_consumers[] = {
563 564
564static struct regulator_init_data wm1192_ldo1 = { 565static struct regulator_init_data wm1192_ldo1 = {
565 .constraints = { 566 .constraints = {
566 .name = "PVDD_LCD/PVDD_EXT", 567 .name = "PVDD_LCD+PVDD_EXT",
567 .always_on = 1, 568 .always_on = 1,
568 }, 569 },
569 .consumer_supplies = wm1192_ldo1_consumers, 570 .consumer_supplies = wm1192_ldo1_consumers,
diff --git a/arch/arm/mach-s3c64xx/setup-keypad.c b/arch/arm/mach-s3c64xx/setup-keypad.c
index f8ed0d22db70..1d4d0ee9e870 100644
--- a/arch/arm/mach-s3c64xx/setup-keypad.c
+++ b/arch/arm/mach-s3c64xx/setup-keypad.c
@@ -17,7 +17,7 @@
17void samsung_keypad_cfg_gpio(unsigned int rows, unsigned int cols) 17void samsung_keypad_cfg_gpio(unsigned int rows, unsigned int cols)
18{ 18{
19 /* Set all the necessary GPK pins to special-function 3: KP_ROW[x] */ 19 /* Set all the necessary GPK pins to special-function 3: KP_ROW[x] */
20 s3c_gpio_cfgrange_nopull(S3C64XX_GPK(8), 8 + rows, S3C_GPIO_SFN(3)); 20 s3c_gpio_cfgrange_nopull(S3C64XX_GPK(8), rows, S3C_GPIO_SFN(3));
21 21
22 /* Set all the necessary GPL pins to special-function 3: KP_COL[x] */ 22 /* Set all the necessary GPL pins to special-function 3: KP_COL[x] */
23 s3c_gpio_cfgrange_nopull(S3C64XX_GPL(0), cols, S3C_GPIO_SFN(3)); 23 s3c_gpio_cfgrange_nopull(S3C64XX_GPL(0), cols, S3C_GPIO_SFN(3));
diff --git a/arch/arm/mach-s3c64xx/setup-sdhci.c b/arch/arm/mach-s3c64xx/setup-sdhci.c
index 1a942037c4ef..f344a222bc84 100644
--- a/arch/arm/mach-s3c64xx/setup-sdhci.c
+++ b/arch/arm/mach-s3c64xx/setup-sdhci.c
@@ -56,7 +56,7 @@ void s3c6400_setup_sdhci_cfg_card(struct platform_device *dev,
56 else 56 else
57 ctrl3 = (S3C_SDHCI_CTRL3_FCSEL1 | S3C_SDHCI_CTRL3_FCSEL0); 57 ctrl3 = (S3C_SDHCI_CTRL3_FCSEL1 | S3C_SDHCI_CTRL3_FCSEL0);
58 58
59 printk(KERN_INFO "%s: CTRL 2=%08x, 3=%08x\n", __func__, ctrl2, ctrl3); 59 pr_debug("%s: CTRL 2=%08x, 3=%08x\n", __func__, ctrl2, ctrl3);
60 writel(ctrl2, r + S3C_SDHCI_CONTROL2); 60 writel(ctrl2, r + S3C_SDHCI_CONTROL2);
61 writel(ctrl3, r + S3C_SDHCI_CONTROL3); 61 writel(ctrl3, r + S3C_SDHCI_CONTROL3);
62} 62}
diff --git a/arch/arm/mach-s5p64x0/include/mach/gpio.h b/arch/arm/mach-s5p64x0/include/mach/gpio.h
index 5486c8f01f1d..adb5f298ead8 100644
--- a/arch/arm/mach-s5p64x0/include/mach/gpio.h
+++ b/arch/arm/mach-s5p64x0/include/mach/gpio.h
@@ -23,7 +23,7 @@
23#define S5P6440_GPIO_A_NR (6) 23#define S5P6440_GPIO_A_NR (6)
24#define S5P6440_GPIO_B_NR (7) 24#define S5P6440_GPIO_B_NR (7)
25#define S5P6440_GPIO_C_NR (8) 25#define S5P6440_GPIO_C_NR (8)
26#define S5P6440_GPIO_F_NR (2) 26#define S5P6440_GPIO_F_NR (16)
27#define S5P6440_GPIO_G_NR (7) 27#define S5P6440_GPIO_G_NR (7)
28#define S5P6440_GPIO_H_NR (10) 28#define S5P6440_GPIO_H_NR (10)
29#define S5P6440_GPIO_I_NR (16) 29#define S5P6440_GPIO_I_NR (16)
@@ -36,7 +36,7 @@
36#define S5P6450_GPIO_B_NR (7) 36#define S5P6450_GPIO_B_NR (7)
37#define S5P6450_GPIO_C_NR (8) 37#define S5P6450_GPIO_C_NR (8)
38#define S5P6450_GPIO_D_NR (8) 38#define S5P6450_GPIO_D_NR (8)
39#define S5P6450_GPIO_F_NR (2) 39#define S5P6450_GPIO_F_NR (16)
40#define S5P6450_GPIO_G_NR (14) 40#define S5P6450_GPIO_G_NR (14)
41#define S5P6450_GPIO_H_NR (10) 41#define S5P6450_GPIO_H_NR (10)
42#define S5P6450_GPIO_I_NR (16) 42#define S5P6450_GPIO_I_NR (16)
diff --git a/arch/arm/mach-shmobile/board-ag5evm.c b/arch/arm/mach-shmobile/board-ag5evm.c
index 2123b96b5638..4303a86e6e38 100644
--- a/arch/arm/mach-shmobile/board-ag5evm.c
+++ b/arch/arm/mach-shmobile/board-ag5evm.c
@@ -454,6 +454,7 @@ static void __init ag5evm_init(void)
454 gpio_direction_output(GPIO_PORT217, 0); 454 gpio_direction_output(GPIO_PORT217, 0);
455 mdelay(1); 455 mdelay(1);
456 gpio_set_value(GPIO_PORT217, 1); 456 gpio_set_value(GPIO_PORT217, 1);
457 mdelay(100);
457 458
458 /* LCD backlight controller */ 459 /* LCD backlight controller */
459 gpio_request(GPIO_PORT235, NULL); /* RESET */ 460 gpio_request(GPIO_PORT235, NULL); /* RESET */
diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c
index 3cf0951caa2d..81d6536552a9 100644
--- a/arch/arm/mach-shmobile/board-ap4evb.c
+++ b/arch/arm/mach-shmobile/board-ap4evb.c
@@ -1303,7 +1303,7 @@ static void __init ap4evb_init(void)
1303 1303
1304 lcdc_info.clock_source = LCDC_CLK_BUS; 1304 lcdc_info.clock_source = LCDC_CLK_BUS;
1305 lcdc_info.ch[0].interface_type = RGB18; 1305 lcdc_info.ch[0].interface_type = RGB18;
1306 lcdc_info.ch[0].clock_divider = 2; 1306 lcdc_info.ch[0].clock_divider = 3;
1307 lcdc_info.ch[0].flags = 0; 1307 lcdc_info.ch[0].flags = 0;
1308 lcdc_info.ch[0].lcd_size_cfg.width = 152; 1308 lcdc_info.ch[0].lcd_size_cfg.width = 152;
1309 lcdc_info.ch[0].lcd_size_cfg.height = 91; 1309 lcdc_info.ch[0].lcd_size_cfg.height = 91;
diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c
index fb4213a4e15a..1657eac5dde2 100644
--- a/arch/arm/mach-shmobile/board-mackerel.c
+++ b/arch/arm/mach-shmobile/board-mackerel.c
@@ -303,7 +303,7 @@ static struct sh_mobile_lcdc_info lcdc_info = {
303 .lcd_cfg = mackerel_lcdc_modes, 303 .lcd_cfg = mackerel_lcdc_modes,
304 .num_cfg = ARRAY_SIZE(mackerel_lcdc_modes), 304 .num_cfg = ARRAY_SIZE(mackerel_lcdc_modes),
305 .interface_type = RGB24, 305 .interface_type = RGB24,
306 .clock_divider = 2, 306 .clock_divider = 3,
307 .flags = 0, 307 .flags = 0,
308 .lcd_size_cfg.width = 152, 308 .lcd_size_cfg.width = 152,
309 .lcd_size_cfg.height = 91, 309 .lcd_size_cfg.height = 91,
diff --git a/arch/arm/mach-shmobile/clock-sh73a0.c b/arch/arm/mach-shmobile/clock-sh73a0.c
index ddd4a1b775f0..7e58904c1c8c 100644
--- a/arch/arm/mach-shmobile/clock-sh73a0.c
+++ b/arch/arm/mach-shmobile/clock-sh73a0.c
@@ -263,7 +263,7 @@ static struct clk div6_clks[DIV6_NR] = {
263}; 263};
264 264
265enum { MSTP001, 265enum { MSTP001,
266 MSTP125, MSTP118, MSTP116, MSTP100, 266 MSTP129, MSTP128, MSTP127, MSTP126, MSTP125, MSTP118, MSTP116, MSTP100,
267 MSTP219, 267 MSTP219,
268 MSTP207, MSTP206, MSTP204, MSTP203, MSTP202, MSTP201, MSTP200, 268 MSTP207, MSTP206, MSTP204, MSTP203, MSTP202, MSTP201, MSTP200,
269 MSTP331, MSTP329, MSTP325, MSTP323, MSTP312, 269 MSTP331, MSTP329, MSTP325, MSTP323, MSTP312,
@@ -275,6 +275,10 @@ enum { MSTP001,
275 275
276static struct clk mstp_clks[MSTP_NR] = { 276static struct clk mstp_clks[MSTP_NR] = {
277 [MSTP001] = MSTP(&div4_clks[DIV4_HP], SMSTPCR0, 1, 0), /* IIC2 */ 277 [MSTP001] = MSTP(&div4_clks[DIV4_HP], SMSTPCR0, 1, 0), /* IIC2 */
278 [MSTP129] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 29, 0), /* CEU1 */
279 [MSTP128] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 28, 0), /* CSI2-RX1 */
280 [MSTP127] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 27, 0), /* CEU0 */
281 [MSTP126] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 26, 0), /* CSI2-RX0 */
278 [MSTP125] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR1, 25, 0), /* TMU0 */ 282 [MSTP125] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR1, 25, 0), /* TMU0 */
279 [MSTP118] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 18, 0), /* DSITX0 */ 283 [MSTP118] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 18, 0), /* DSITX0 */
280 [MSTP116] = MSTP(&div4_clks[DIV4_HP], SMSTPCR1, 16, 0), /* IIC0 */ 284 [MSTP116] = MSTP(&div4_clks[DIV4_HP], SMSTPCR1, 16, 0), /* IIC0 */
@@ -306,6 +310,9 @@ static struct clk_lookup lookups[] = {
306 CLKDEV_CON_ID("r_clk", &r_clk), 310 CLKDEV_CON_ID("r_clk", &r_clk),
307 311
308 /* DIV6 clocks */ 312 /* DIV6 clocks */
313 CLKDEV_CON_ID("vck1_clk", &div6_clks[DIV6_VCK1]),
314 CLKDEV_CON_ID("vck2_clk", &div6_clks[DIV6_VCK2]),
315 CLKDEV_CON_ID("vck3_clk", &div6_clks[DIV6_VCK3]),
309 CLKDEV_ICK_ID("dsit_clk", "sh-mipi-dsi.0", &div6_clks[DIV6_DSIT]), 316 CLKDEV_ICK_ID("dsit_clk", "sh-mipi-dsi.0", &div6_clks[DIV6_DSIT]),
310 CLKDEV_ICK_ID("dsit_clk", "sh-mipi-dsi.1", &div6_clks[DIV6_DSIT]), 317 CLKDEV_ICK_ID("dsit_clk", "sh-mipi-dsi.1", &div6_clks[DIV6_DSIT]),
311 CLKDEV_ICK_ID("dsi0p_clk", "sh-mipi-dsi.0", &div6_clks[DIV6_DSI0P]), 318 CLKDEV_ICK_ID("dsi0p_clk", "sh-mipi-dsi.0", &div6_clks[DIV6_DSI0P]),
@@ -313,11 +320,15 @@ static struct clk_lookup lookups[] = {
313 320
314 /* MSTP32 clocks */ 321 /* MSTP32 clocks */
315 CLKDEV_DEV_ID("i2c-sh_mobile.2", &mstp_clks[MSTP001]), /* I2C2 */ 322 CLKDEV_DEV_ID("i2c-sh_mobile.2", &mstp_clks[MSTP001]), /* I2C2 */
316 CLKDEV_DEV_ID("sh_mobile_lcdc_fb.0", &mstp_clks[MSTP100]), /* LCDC0 */ 323 CLKDEV_DEV_ID("sh_mobile_ceu.1", &mstp_clks[MSTP129]), /* CEU1 */
324 CLKDEV_DEV_ID("sh-mobile-csi2.1", &mstp_clks[MSTP128]), /* CSI2-RX1 */
325 CLKDEV_DEV_ID("sh_mobile_ceu.0", &mstp_clks[MSTP127]), /* CEU0 */
326 CLKDEV_DEV_ID("sh-mobile-csi2.0", &mstp_clks[MSTP126]), /* CSI2-RX0 */
317 CLKDEV_DEV_ID("sh_tmu.0", &mstp_clks[MSTP125]), /* TMU00 */ 327 CLKDEV_DEV_ID("sh_tmu.0", &mstp_clks[MSTP125]), /* TMU00 */
318 CLKDEV_DEV_ID("sh_tmu.1", &mstp_clks[MSTP125]), /* TMU01 */ 328 CLKDEV_DEV_ID("sh_tmu.1", &mstp_clks[MSTP125]), /* TMU01 */
319 CLKDEV_DEV_ID("i2c-sh_mobile.0", &mstp_clks[MSTP116]), /* I2C0 */
320 CLKDEV_DEV_ID("sh-mipi-dsi.0", &mstp_clks[MSTP118]), /* DSITX */ 329 CLKDEV_DEV_ID("sh-mipi-dsi.0", &mstp_clks[MSTP118]), /* DSITX */
330 CLKDEV_DEV_ID("i2c-sh_mobile.0", &mstp_clks[MSTP116]), /* I2C0 */
331 CLKDEV_DEV_ID("sh_mobile_lcdc_fb.0", &mstp_clks[MSTP100]), /* LCDC0 */
321 CLKDEV_DEV_ID("sh-sci.7", &mstp_clks[MSTP219]), /* SCIFA7 */ 332 CLKDEV_DEV_ID("sh-sci.7", &mstp_clks[MSTP219]), /* SCIFA7 */
322 CLKDEV_DEV_ID("sh-sci.5", &mstp_clks[MSTP207]), /* SCIFA5 */ 333 CLKDEV_DEV_ID("sh-sci.5", &mstp_clks[MSTP207]), /* SCIFA5 */
323 CLKDEV_DEV_ID("sh-sci.8", &mstp_clks[MSTP206]), /* SCIFB */ 334 CLKDEV_DEV_ID("sh-sci.8", &mstp_clks[MSTP206]), /* SCIFB */
diff --git a/arch/arm/mach-shmobile/include/mach/head-ap4evb.txt b/arch/arm/mach-shmobile/include/mach/head-ap4evb.txt
index efd3687ba190..3029aba38688 100644
--- a/arch/arm/mach-shmobile/include/mach/head-ap4evb.txt
+++ b/arch/arm/mach-shmobile/include/mach/head-ap4evb.txt
@@ -6,13 +6,10 @@ LIST "RWT Setting"
6EW 0xE6020004, 0xA500 6EW 0xE6020004, 0xA500
7EW 0xE6030004, 0xA500 7EW 0xE6030004, 0xA500
8 8
9DD 0x01001000, 0x01001000
10
11LIST "GPIO Setting" 9LIST "GPIO Setting"
12EB 0xE6051013, 0xA2 10EB 0xE6051013, 0xA2
13 11
14LIST "CPG" 12LIST "CPG"
15ED 0xE6150080, 0x00000180
16ED 0xE61500C0, 0x00000002 13ED 0xE61500C0, 0x00000002
17 14
18WAIT 1, 0xFE40009C 15WAIT 1, 0xFE40009C
@@ -37,6 +34,9 @@ ED 0xE615002C, 0x93000040
37 34
38WAIT 1, 0xFE40009C 35WAIT 1, 0xFE40009C
39 36
37LIST "SUB/USBClk"
38ED 0xE6150080, 0x00000180
39
40LIST "BSC" 40LIST "BSC"
41ED 0xFEC10000, 0x00E0001B 41ED 0xFEC10000, 0x00E0001B
42 42
@@ -53,7 +53,7 @@ ED 0xFE400048, 0x20C18505
53ED 0xFE40004C, 0x00110209 53ED 0xFE40004C, 0x00110209
54ED 0xFE400010, 0x00000087 54ED 0xFE400010, 0x00000087
55 55
56WAIT 10, 0xFE40009C 56WAIT 30, 0xFE40009C
57 57
58ED 0xFE400084, 0x0000003F 58ED 0xFE400084, 0x0000003F
59EB 0xFE500000, 0x00 59EB 0xFE500000, 0x00
@@ -84,7 +84,7 @@ ED 0xE6150004, 0x80331050
84 84
85WAIT 1, 0xFE40009C 85WAIT 1, 0xFE40009C
86 86
87ED 0xE6150354, 0x00000002 87ED 0xFE400354, 0x01AD8002
88 88
89LIST "SCIF0 - Serial port for earlyprintk" 89LIST "SCIF0 - Serial port for earlyprintk"
90EB 0xE6053098, 0x11 90EB 0xE6053098, 0x11
diff --git a/arch/arm/mach-shmobile/include/mach/head-mackerel.txt b/arch/arm/mach-shmobile/include/mach/head-mackerel.txt
index efd3687ba190..3029aba38688 100644
--- a/arch/arm/mach-shmobile/include/mach/head-mackerel.txt
+++ b/arch/arm/mach-shmobile/include/mach/head-mackerel.txt
@@ -6,13 +6,10 @@ LIST "RWT Setting"
6EW 0xE6020004, 0xA500 6EW 0xE6020004, 0xA500
7EW 0xE6030004, 0xA500 7EW 0xE6030004, 0xA500
8 8
9DD 0x01001000, 0x01001000
10
11LIST "GPIO Setting" 9LIST "GPIO Setting"
12EB 0xE6051013, 0xA2 10EB 0xE6051013, 0xA2
13 11
14LIST "CPG" 12LIST "CPG"
15ED 0xE6150080, 0x00000180
16ED 0xE61500C0, 0x00000002 13ED 0xE61500C0, 0x00000002
17 14
18WAIT 1, 0xFE40009C 15WAIT 1, 0xFE40009C
@@ -37,6 +34,9 @@ ED 0xE615002C, 0x93000040
37 34
38WAIT 1, 0xFE40009C 35WAIT 1, 0xFE40009C
39 36
37LIST "SUB/USBClk"
38ED 0xE6150080, 0x00000180
39
40LIST "BSC" 40LIST "BSC"
41ED 0xFEC10000, 0x00E0001B 41ED 0xFEC10000, 0x00E0001B
42 42
@@ -53,7 +53,7 @@ ED 0xFE400048, 0x20C18505
53ED 0xFE40004C, 0x00110209 53ED 0xFE40004C, 0x00110209
54ED 0xFE400010, 0x00000087 54ED 0xFE400010, 0x00000087
55 55
56WAIT 10, 0xFE40009C 56WAIT 30, 0xFE40009C
57 57
58ED 0xFE400084, 0x0000003F 58ED 0xFE400084, 0x0000003F
59EB 0xFE500000, 0x00 59EB 0xFE500000, 0x00
@@ -84,7 +84,7 @@ ED 0xE6150004, 0x80331050
84 84
85WAIT 1, 0xFE40009C 85WAIT 1, 0xFE40009C
86 86
87ED 0xE6150354, 0x00000002 87ED 0xFE400354, 0x01AD8002
88 88
89LIST "SCIF0 - Serial port for earlyprintk" 89LIST "SCIF0 - Serial port for earlyprintk"
90EB 0xE6053098, 0x11 90EB 0xE6053098, 0x11
diff --git a/arch/arm/mach-tegra/include/mach/kbc.h b/arch/arm/mach-tegra/include/mach/kbc.h
index 66ad2760c621..04c779832c78 100644
--- a/arch/arm/mach-tegra/include/mach/kbc.h
+++ b/arch/arm/mach-tegra/include/mach/kbc.h
@@ -57,5 +57,6 @@ struct tegra_kbc_platform_data {
57 const struct matrix_keymap_data *keymap_data; 57 const struct matrix_keymap_data *keymap_data;
58 58
59 bool wakeup; 59 bool wakeup;
60 bool use_fn_map;
60}; 61};
61#endif 62#endif
diff --git a/arch/arm/plat-omap/mailbox.c b/arch/arm/plat-omap/mailbox.c
index 459b319a9fad..49d3208793e5 100644
--- a/arch/arm/plat-omap/mailbox.c
+++ b/arch/arm/plat-omap/mailbox.c
@@ -322,15 +322,18 @@ static void omap_mbox_fini(struct omap_mbox *mbox)
322 322
323struct omap_mbox *omap_mbox_get(const char *name, struct notifier_block *nb) 323struct omap_mbox *omap_mbox_get(const char *name, struct notifier_block *nb)
324{ 324{
325 struct omap_mbox *mbox; 325 struct omap_mbox *_mbox, *mbox = NULL;
326 int ret; 326 int i, ret;
327 327
328 if (!mboxes) 328 if (!mboxes)
329 return ERR_PTR(-EINVAL); 329 return ERR_PTR(-EINVAL);
330 330
331 for (mbox = *mboxes; mbox; mbox++) 331 for (i = 0; (_mbox = mboxes[i]); i++) {
332 if (!strcmp(mbox->name, name)) 332 if (!strcmp(_mbox->name, name)) {
333 mbox = _mbox;
333 break; 334 break;
335 }
336 }
334 337
335 if (!mbox) 338 if (!mbox)
336 return ERR_PTR(-ENOENT); 339 return ERR_PTR(-ENOENT);
diff --git a/arch/arm/plat-samsung/dev-uart.c b/arch/arm/plat-samsung/dev-uart.c
index 3776cd952450..5928105490fa 100644
--- a/arch/arm/plat-samsung/dev-uart.c
+++ b/arch/arm/plat-samsung/dev-uart.c
@@ -15,6 +15,8 @@
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/platform_device.h> 16#include <linux/platform_device.h>
17 17
18#include <plat/devs.h>
19
18/* uart devices */ 20/* uart devices */
19 21
20static struct platform_device s3c24xx_uart_device0 = { 22static struct platform_device s3c24xx_uart_device0 = {
diff --git a/arch/blackfin/kernel/time.c b/arch/blackfin/kernel/time.c
index c9113619029f..8d73724c0092 100644
--- a/arch/blackfin/kernel/time.c
+++ b/arch/blackfin/kernel/time.c
@@ -114,16 +114,14 @@ u32 arch_gettimeoffset(void)
114 114
115/* 115/*
116 * timer_interrupt() needs to keep up the real-time clock, 116 * timer_interrupt() needs to keep up the real-time clock,
117 * as well as call the "do_timer()" routine every clocktick 117 * as well as call the "xtime_update()" routine every clocktick
118 */ 118 */
119#ifdef CONFIG_CORE_TIMER_IRQ_L1 119#ifdef CONFIG_CORE_TIMER_IRQ_L1
120__attribute__((l1_text)) 120__attribute__((l1_text))
121#endif 121#endif
122irqreturn_t timer_interrupt(int irq, void *dummy) 122irqreturn_t timer_interrupt(int irq, void *dummy)
123{ 123{
124 write_seqlock(&xtime_lock); 124 xtime_update(1);
125 do_timer(1);
126 write_sequnlock(&xtime_lock);
127 125
128#ifdef CONFIG_IPIPE 126#ifdef CONFIG_IPIPE
129 update_root_process_times(get_irq_regs()); 127 update_root_process_times(get_irq_regs());
diff --git a/arch/blackfin/lib/outs.S b/arch/blackfin/lib/outs.S
index 250f4d4b9436..06a5e674401f 100644
--- a/arch/blackfin/lib/outs.S
+++ b/arch/blackfin/lib/outs.S
@@ -13,6 +13,8 @@
13.align 2 13.align 2
14 14
15ENTRY(_outsl) 15ENTRY(_outsl)
16 CC = R2 == 0;
17 IF CC JUMP 1f;
16 P0 = R0; /* P0 = port */ 18 P0 = R0; /* P0 = port */
17 P1 = R1; /* P1 = address */ 19 P1 = R1; /* P1 = address */
18 P2 = R2; /* P2 = count */ 20 P2 = R2; /* P2 = count */
@@ -20,10 +22,12 @@ ENTRY(_outsl)
20 LSETUP( .Llong_loop_s, .Llong_loop_e) LC0 = P2; 22 LSETUP( .Llong_loop_s, .Llong_loop_e) LC0 = P2;
21.Llong_loop_s: R0 = [P1++]; 23.Llong_loop_s: R0 = [P1++];
22.Llong_loop_e: [P0] = R0; 24.Llong_loop_e: [P0] = R0;
23 RTS; 251: RTS;
24ENDPROC(_outsl) 26ENDPROC(_outsl)
25 27
26ENTRY(_outsw) 28ENTRY(_outsw)
29 CC = R2 == 0;
30 IF CC JUMP 1f;
27 P0 = R0; /* P0 = port */ 31 P0 = R0; /* P0 = port */
28 P1 = R1; /* P1 = address */ 32 P1 = R1; /* P1 = address */
29 P2 = R2; /* P2 = count */ 33 P2 = R2; /* P2 = count */
@@ -31,10 +35,12 @@ ENTRY(_outsw)
31 LSETUP( .Lword_loop_s, .Lword_loop_e) LC0 = P2; 35 LSETUP( .Lword_loop_s, .Lword_loop_e) LC0 = P2;
32.Lword_loop_s: R0 = W[P1++]; 36.Lword_loop_s: R0 = W[P1++];
33.Lword_loop_e: W[P0] = R0; 37.Lword_loop_e: W[P0] = R0;
34 RTS; 381: RTS;
35ENDPROC(_outsw) 39ENDPROC(_outsw)
36 40
37ENTRY(_outsb) 41ENTRY(_outsb)
42 CC = R2 == 0;
43 IF CC JUMP 1f;
38 P0 = R0; /* P0 = port */ 44 P0 = R0; /* P0 = port */
39 P1 = R1; /* P1 = address */ 45 P1 = R1; /* P1 = address */
40 P2 = R2; /* P2 = count */ 46 P2 = R2; /* P2 = count */
@@ -42,10 +48,12 @@ ENTRY(_outsb)
42 LSETUP( .Lbyte_loop_s, .Lbyte_loop_e) LC0 = P2; 48 LSETUP( .Lbyte_loop_s, .Lbyte_loop_e) LC0 = P2;
43.Lbyte_loop_s: R0 = B[P1++]; 49.Lbyte_loop_s: R0 = B[P1++];
44.Lbyte_loop_e: B[P0] = R0; 50.Lbyte_loop_e: B[P0] = R0;
45 RTS; 511: RTS;
46ENDPROC(_outsb) 52ENDPROC(_outsb)
47 53
48ENTRY(_outsw_8) 54ENTRY(_outsw_8)
55 CC = R2 == 0;
56 IF CC JUMP 1f;
49 P0 = R0; /* P0 = port */ 57 P0 = R0; /* P0 = port */
50 P1 = R1; /* P1 = address */ 58 P1 = R1; /* P1 = address */
51 P2 = R2; /* P2 = count */ 59 P2 = R2; /* P2 = count */
@@ -56,5 +64,5 @@ ENTRY(_outsw_8)
56 R0 = R0 << 8; 64 R0 = R0 << 8;
57 R0 = R0 + R1; 65 R0 = R0 + R1;
58.Lword8_loop_e: W[P0] = R0; 66.Lword8_loop_e: W[P0] = R0;
59 RTS; 671: RTS;
60ENDPROC(_outsw_8) 68ENDPROC(_outsw_8)
diff --git a/arch/blackfin/mach-common/cache.S b/arch/blackfin/mach-common/cache.S
index 790c767ca95a..ab4a925a443e 100644
--- a/arch/blackfin/mach-common/cache.S
+++ b/arch/blackfin/mach-common/cache.S
@@ -58,6 +58,8 @@
581: 581:
59.ifeqs "\flushins", BROK_FLUSH_INST 59.ifeqs "\flushins", BROK_FLUSH_INST
60 \flushins [P0++]; 60 \flushins [P0++];
61 nop;
62 nop;
612: nop; 632: nop;
62.else 64.else
632: \flushins [P0++]; 652: \flushins [P0++];
diff --git a/arch/cris/arch-v10/kernel/time.c b/arch/cris/arch-v10/kernel/time.c
index 00eb36f8debf..20c85b5dc7d0 100644
--- a/arch/cris/arch-v10/kernel/time.c
+++ b/arch/cris/arch-v10/kernel/time.c
@@ -140,7 +140,7 @@ stop_watchdog(void)
140 140
141/* 141/*
142 * timer_interrupt() needs to keep up the real-time clock, 142 * timer_interrupt() needs to keep up the real-time clock,
143 * as well as call the "do_timer()" routine every clocktick 143 * as well as call the "xtime_update()" routine every clocktick
144 */ 144 */
145 145
146//static unsigned short myjiff; /* used by our debug routine print_timestamp */ 146//static unsigned short myjiff; /* used by our debug routine print_timestamp */
@@ -176,7 +176,7 @@ timer_interrupt(int irq, void *dev_id)
176 176
177 /* call the real timer interrupt handler */ 177 /* call the real timer interrupt handler */
178 178
179 do_timer(1); 179 xtime_update(1);
180 180
181 cris_do_profile(regs); /* Save profiling information */ 181 cris_do_profile(regs); /* Save profiling information */
182 return IRQ_HANDLED; 182 return IRQ_HANDLED;
diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c
index 84fed3b4b079..4c9e3e1ba5d1 100644
--- a/arch/cris/arch-v32/kernel/smp.c
+++ b/arch/cris/arch-v32/kernel/smp.c
@@ -26,7 +26,9 @@
26#define FLUSH_ALL (void*)0xffffffff 26#define FLUSH_ALL (void*)0xffffffff
27 27
28/* Vector of locks used for various atomic operations */ 28/* Vector of locks used for various atomic operations */
29spinlock_t cris_atomic_locks[] = { [0 ... LOCK_COUNT - 1] = SPIN_LOCK_UNLOCKED}; 29spinlock_t cris_atomic_locks[] = {
30 [0 ... LOCK_COUNT - 1] = __SPIN_LOCK_UNLOCKED(cris_atomic_locks)
31};
30 32
31/* CPU masks */ 33/* CPU masks */
32cpumask_t phys_cpu_present_map = CPU_MASK_NONE; 34cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
diff --git a/arch/cris/arch-v32/kernel/time.c b/arch/cris/arch-v32/kernel/time.c
index a545211e999d..bb978ede8985 100644
--- a/arch/cris/arch-v32/kernel/time.c
+++ b/arch/cris/arch-v32/kernel/time.c
@@ -183,7 +183,7 @@ void handle_watchdog_bite(struct pt_regs *regs)
183 183
184/* 184/*
185 * timer_interrupt() needs to keep up the real-time clock, 185 * timer_interrupt() needs to keep up the real-time clock,
186 * as well as call the "do_timer()" routine every clocktick. 186 * as well as call the "xtime_update()" routine every clocktick.
187 */ 187 */
188extern void cris_do_profile(struct pt_regs *regs); 188extern void cris_do_profile(struct pt_regs *regs);
189 189
@@ -216,9 +216,7 @@ static inline irqreturn_t timer_interrupt(int irq, void *dev_id)
216 return IRQ_HANDLED; 216 return IRQ_HANDLED;
217 217
218 /* Call the real timer interrupt handler */ 218 /* Call the real timer interrupt handler */
219 write_seqlock(&xtime_lock); 219 xtime_update(1);
220 do_timer(1);
221 write_sequnlock(&xtime_lock);
222 return IRQ_HANDLED; 220 return IRQ_HANDLED;
223} 221}
224 222
diff --git a/arch/cris/kernel/vmlinux.lds.S b/arch/cris/kernel/vmlinux.lds.S
index 442218980db0..c49be845f96a 100644
--- a/arch/cris/kernel/vmlinux.lds.S
+++ b/arch/cris/kernel/vmlinux.lds.S
@@ -72,11 +72,6 @@ SECTIONS
72 INIT_TEXT_SECTION(PAGE_SIZE) 72 INIT_TEXT_SECTION(PAGE_SIZE)
73 .init.data : { INIT_DATA } 73 .init.data : { INIT_DATA }
74 .init.setup : { INIT_SETUP(16) } 74 .init.setup : { INIT_SETUP(16) }
75#ifdef CONFIG_ETRAX_ARCH_V32
76 __start___param = .;
77 __param : { *(__param) }
78 __stop___param = .;
79#endif
80 .initcall.init : { 75 .initcall.init : {
81 INIT_CALLS 76 INIT_CALLS
82 } 77 }
diff --git a/arch/frv/include/asm/futex.h b/arch/frv/include/asm/futex.h
index 08b3d1da3583..4bea27f50a7a 100644
--- a/arch/frv/include/asm/futex.h
+++ b/arch/frv/include/asm/futex.h
@@ -7,10 +7,11 @@
7#include <asm/errno.h> 7#include <asm/errno.h>
8#include <asm/uaccess.h> 8#include <asm/uaccess.h>
9 9
10extern int futex_atomic_op_inuser(int encoded_op, int __user *uaddr); 10extern int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr);
11 11
12static inline int 12static inline int
13futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 13futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
14 u32 oldval, u32 newval)
14{ 15{
15 return -ENOSYS; 16 return -ENOSYS;
16} 17}
diff --git a/arch/frv/kernel/futex.c b/arch/frv/kernel/futex.c
index 14f64b054c7e..d155ca9e5098 100644
--- a/arch/frv/kernel/futex.c
+++ b/arch/frv/kernel/futex.c
@@ -18,7 +18,7 @@
18 * the various futex operations; MMU fault checking is ignored under no-MMU 18 * the various futex operations; MMU fault checking is ignored under no-MMU
19 * conditions 19 * conditions
20 */ 20 */
21static inline int atomic_futex_op_xchg_set(int oparg, int __user *uaddr, int *_oldval) 21static inline int atomic_futex_op_xchg_set(int oparg, u32 __user *uaddr, int *_oldval)
22{ 22{
23 int oldval, ret; 23 int oldval, ret;
24 24
@@ -50,7 +50,7 @@ static inline int atomic_futex_op_xchg_set(int oparg, int __user *uaddr, int *_o
50 return ret; 50 return ret;
51} 51}
52 52
53static inline int atomic_futex_op_xchg_add(int oparg, int __user *uaddr, int *_oldval) 53static inline int atomic_futex_op_xchg_add(int oparg, u32 __user *uaddr, int *_oldval)
54{ 54{
55 int oldval, ret; 55 int oldval, ret;
56 56
@@ -83,7 +83,7 @@ static inline int atomic_futex_op_xchg_add(int oparg, int __user *uaddr, int *_o
83 return ret; 83 return ret;
84} 84}
85 85
86static inline int atomic_futex_op_xchg_or(int oparg, int __user *uaddr, int *_oldval) 86static inline int atomic_futex_op_xchg_or(int oparg, u32 __user *uaddr, int *_oldval)
87{ 87{
88 int oldval, ret; 88 int oldval, ret;
89 89
@@ -116,7 +116,7 @@ static inline int atomic_futex_op_xchg_or(int oparg, int __user *uaddr, int *_ol
116 return ret; 116 return ret;
117} 117}
118 118
119static inline int atomic_futex_op_xchg_and(int oparg, int __user *uaddr, int *_oldval) 119static inline int atomic_futex_op_xchg_and(int oparg, u32 __user *uaddr, int *_oldval)
120{ 120{
121 int oldval, ret; 121 int oldval, ret;
122 122
@@ -149,7 +149,7 @@ static inline int atomic_futex_op_xchg_and(int oparg, int __user *uaddr, int *_o
149 return ret; 149 return ret;
150} 150}
151 151
152static inline int atomic_futex_op_xchg_xor(int oparg, int __user *uaddr, int *_oldval) 152static inline int atomic_futex_op_xchg_xor(int oparg, u32 __user *uaddr, int *_oldval)
153{ 153{
154 int oldval, ret; 154 int oldval, ret;
155 155
@@ -186,7 +186,7 @@ static inline int atomic_futex_op_xchg_xor(int oparg, int __user *uaddr, int *_o
186/* 186/*
187 * do the futex operations 187 * do the futex operations
188 */ 188 */
189int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) 189int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
190{ 190{
191 int op = (encoded_op >> 28) & 7; 191 int op = (encoded_op >> 28) & 7;
192 int cmp = (encoded_op >> 24) & 15; 192 int cmp = (encoded_op >> 24) & 15;
@@ -197,7 +197,7 @@ int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
197 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 197 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
198 oparg = 1 << oparg; 198 oparg = 1 << oparg;
199 199
200 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 200 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
201 return -EFAULT; 201 return -EFAULT;
202 202
203 pagefault_disable(); 203 pagefault_disable();
diff --git a/arch/frv/kernel/time.c b/arch/frv/kernel/time.c
index 0ddbbae83cb2..b457de496b70 100644
--- a/arch/frv/kernel/time.c
+++ b/arch/frv/kernel/time.c
@@ -50,21 +50,13 @@ static struct irqaction timer_irq = {
50 50
51/* 51/*
52 * timer_interrupt() needs to keep up the real-time clock, 52 * timer_interrupt() needs to keep up the real-time clock,
53 * as well as call the "do_timer()" routine every clocktick 53 * as well as call the "xtime_update()" routine every clocktick
54 */ 54 */
55static irqreturn_t timer_interrupt(int irq, void *dummy) 55static irqreturn_t timer_interrupt(int irq, void *dummy)
56{ 56{
57 profile_tick(CPU_PROFILING); 57 profile_tick(CPU_PROFILING);
58 /*
59 * Here we are in the timer irq handler. We just have irqs locally
60 * disabled but we don't know if the timer_bh is running on the other
61 * CPU. We need to avoid to SMP race with it. NOTE: we don't need
62 * the irq version of write_lock because as just said we have irq
63 * locally disabled. -arca
64 */
65 write_seqlock(&xtime_lock);
66 58
67 do_timer(1); 59 xtime_update(1);
68 60
69#ifdef CONFIG_HEARTBEAT 61#ifdef CONFIG_HEARTBEAT
70 static unsigned short n; 62 static unsigned short n;
@@ -72,8 +64,6 @@ static irqreturn_t timer_interrupt(int irq, void *dummy)
72 __set_LEDS(n); 64 __set_LEDS(n);
73#endif /* CONFIG_HEARTBEAT */ 65#endif /* CONFIG_HEARTBEAT */
74 66
75 write_sequnlock(&xtime_lock);
76
77 update_process_times(user_mode(get_irq_regs())); 67 update_process_times(user_mode(get_irq_regs()));
78 68
79 return IRQ_HANDLED; 69 return IRQ_HANDLED;
diff --git a/arch/h8300/kernel/time.c b/arch/h8300/kernel/time.c
index 165005aff9df..32263a138aa6 100644
--- a/arch/h8300/kernel/time.c
+++ b/arch/h8300/kernel/time.c
@@ -35,9 +35,7 @@ void h8300_timer_tick(void)
35{ 35{
36 if (current->pid) 36 if (current->pid)
37 profile_tick(CPU_PROFILING); 37 profile_tick(CPU_PROFILING);
38 write_seqlock(&xtime_lock); 38 xtime_update(1);
39 do_timer(1);
40 write_sequnlock(&xtime_lock);
41 update_process_times(user_mode(get_irq_regs())); 39 update_process_times(user_mode(get_irq_regs()));
42} 40}
43 41
diff --git a/arch/h8300/kernel/timer/timer8.c b/arch/h8300/kernel/timer/timer8.c
index 3946c0fa8374..7a1533fad47d 100644
--- a/arch/h8300/kernel/timer/timer8.c
+++ b/arch/h8300/kernel/timer/timer8.c
@@ -61,7 +61,7 @@
61 61
62/* 62/*
63 * timer_interrupt() needs to keep up the real-time clock, 63 * timer_interrupt() needs to keep up the real-time clock,
64 * as well as call the "do_timer()" routine every clocktick 64 * as well as call the "xtime_update()" routine every clocktick
65 */ 65 */
66 66
67static irqreturn_t timer_interrupt(int irq, void *dev_id) 67static irqreturn_t timer_interrupt(int irq, void *dev_id)
diff --git a/arch/ia64/include/asm/futex.h b/arch/ia64/include/asm/futex.h
index c7f0f062239c..8428525ddb22 100644
--- a/arch/ia64/include/asm/futex.h
+++ b/arch/ia64/include/asm/futex.h
@@ -46,7 +46,7 @@ do { \
46} while (0) 46} while (0)
47 47
48static inline int 48static inline int
49futex_atomic_op_inuser (int encoded_op, int __user *uaddr) 49futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
50{ 50{
51 int op = (encoded_op >> 28) & 7; 51 int op = (encoded_op >> 28) & 7;
52 int cmp = (encoded_op >> 24) & 15; 52 int cmp = (encoded_op >> 24) & 15;
@@ -56,7 +56,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
56 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 56 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
57 oparg = 1 << oparg; 57 oparg = 1 << oparg;
58 58
59 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) 59 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
60 return -EFAULT; 60 return -EFAULT;
61 61
62 pagefault_disable(); 62 pagefault_disable();
@@ -100,23 +100,26 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
100} 100}
101 101
102static inline int 102static inline int
103futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 103futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
104 u32 oldval, u32 newval)
104{ 105{
105 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 106 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
106 return -EFAULT; 107 return -EFAULT;
107 108
108 { 109 {
109 register unsigned long r8 __asm ("r8"); 110 register unsigned long r8 __asm ("r8") = 0;
111 unsigned long prev;
110 __asm__ __volatile__( 112 __asm__ __volatile__(
111 " mf;; \n" 113 " mf;; \n"
112 " mov ar.ccv=%3;; \n" 114 " mov ar.ccv=%3;; \n"
113 "[1:] cmpxchg4.acq %0=[%1],%2,ar.ccv \n" 115 "[1:] cmpxchg4.acq %0=[%1],%2,ar.ccv \n"
114 " .xdata4 \"__ex_table\", 1b-., 2f-. \n" 116 " .xdata4 \"__ex_table\", 1b-., 2f-. \n"
115 "[2:]" 117 "[2:]"
116 : "=r" (r8) 118 : "=r" (prev)
117 : "r" (uaddr), "r" (newval), 119 : "r" (uaddr), "r" (newval),
118 "rO" ((long) (unsigned) oldval) 120 "rO" ((long) (unsigned) oldval)
119 : "memory"); 121 : "memory");
122 *uval = prev;
120 return r8; 123 return r8;
121 } 124 }
122} 125}
diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h
index 215d5454c7d3..3027e7516d85 100644
--- a/arch/ia64/include/asm/rwsem.h
+++ b/arch/ia64/include/asm/rwsem.h
@@ -25,20 +25,8 @@
25#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead." 25#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
26#endif 26#endif
27 27
28#include <linux/list.h>
29#include <linux/spinlock.h>
30
31#include <asm/intrinsics.h> 28#include <asm/intrinsics.h>
32 29
33/*
34 * the semaphore definition
35 */
36struct rw_semaphore {
37 signed long count;
38 spinlock_t wait_lock;
39 struct list_head wait_list;
40};
41
42#define RWSEM_UNLOCKED_VALUE __IA64_UL_CONST(0x0000000000000000) 30#define RWSEM_UNLOCKED_VALUE __IA64_UL_CONST(0x0000000000000000)
43#define RWSEM_ACTIVE_BIAS (1L) 31#define RWSEM_ACTIVE_BIAS (1L)
44#define RWSEM_ACTIVE_MASK (0xffffffffL) 32#define RWSEM_ACTIVE_MASK (0xffffffffL)
@@ -46,26 +34,6 @@ struct rw_semaphore {
46#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS 34#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
47#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 35#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
48 36
49#define __RWSEM_INITIALIZER(name) \
50 { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
51 LIST_HEAD_INIT((name).wait_list) }
52
53#define DECLARE_RWSEM(name) \
54 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
55
56extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
57extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
58extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
59extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
60
61static inline void
62init_rwsem (struct rw_semaphore *sem)
63{
64 sem->count = RWSEM_UNLOCKED_VALUE;
65 spin_lock_init(&sem->wait_lock);
66 INIT_LIST_HEAD(&sem->wait_list);
67}
68
69/* 37/*
70 * lock for reading 38 * lock for reading
71 */ 39 */
@@ -174,9 +142,4 @@ __downgrade_write (struct rw_semaphore *sem)
174#define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count)) 142#define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count))
175#define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count)) 143#define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count))
176 144
177static inline int rwsem_is_locked(struct rw_semaphore *sem)
178{
179 return (sem->count != 0);
180}
181
182#endif /* _ASM_IA64_RWSEM_H */ 145#endif /* _ASM_IA64_RWSEM_H */
diff --git a/arch/ia64/include/asm/xen/hypercall.h b/arch/ia64/include/asm/xen/hypercall.h
index 96fc62366aa4..ed28bcd5bb85 100644
--- a/arch/ia64/include/asm/xen/hypercall.h
+++ b/arch/ia64/include/asm/xen/hypercall.h
@@ -107,7 +107,7 @@ extern unsigned long __hypercall(unsigned long a1, unsigned long a2,
107static inline int 107static inline int
108xencomm_arch_hypercall_sched_op(int cmd, struct xencomm_handle *arg) 108xencomm_arch_hypercall_sched_op(int cmd, struct xencomm_handle *arg)
109{ 109{
110 return _hypercall2(int, sched_op_new, cmd, arg); 110 return _hypercall2(int, sched_op, cmd, arg);
111} 111}
112 112
113static inline long 113static inline long
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 9702fa92489e..156ad803d5b7 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -190,19 +190,10 @@ timer_interrupt (int irq, void *dev_id)
190 190
191 new_itm += local_cpu_data->itm_delta; 191 new_itm += local_cpu_data->itm_delta;
192 192
193 if (smp_processor_id() == time_keeper_id) { 193 if (smp_processor_id() == time_keeper_id)
194 /* 194 xtime_update(1);
195 * Here we are in the timer irq handler. We have irqs locally 195
196 * disabled, but we don't know if the timer_bh is running on 196 local_cpu_data->itm_next = new_itm;
197 * another CPU. We need to avoid to SMP race by acquiring the
198 * xtime_lock.
199 */
200 write_seqlock(&xtime_lock);
201 do_timer(1);
202 local_cpu_data->itm_next = new_itm;
203 write_sequnlock(&xtime_lock);
204 } else
205 local_cpu_data->itm_next = new_itm;
206 197
207 if (time_after(new_itm, ia64_get_itc())) 198 if (time_after(new_itm, ia64_get_itc()))
208 break; 199 break;
@@ -222,7 +213,7 @@ skip_process_time_accounting:
222 * comfort, we increase the safety margin by 213 * comfort, we increase the safety margin by
223 * intentionally dropping the next tick(s). We do NOT 214 * intentionally dropping the next tick(s). We do NOT
224 * update itm.next because that would force us to call 215 * update itm.next because that would force us to call
225 * do_timer() which in turn would let our clock run 216 * xtime_update() which in turn would let our clock run
226 * too fast (with the potentially devastating effect 217 * too fast (with the potentially devastating effect
227 * of losing monotony of time). 218 * of losing monotony of time).
228 */ 219 */
diff --git a/arch/ia64/xen/suspend.c b/arch/ia64/xen/suspend.c
index fd66b048c6fa..419c8620945a 100644
--- a/arch/ia64/xen/suspend.c
+++ b/arch/ia64/xen/suspend.c
@@ -37,19 +37,14 @@ xen_mm_unpin_all(void)
37 /* nothing */ 37 /* nothing */
38} 38}
39 39
40void xen_pre_device_suspend(void)
41{
42 /* nothing */
43}
44
45void 40void
46xen_pre_suspend() 41xen_arch_pre_suspend()
47{ 42{
48 /* nothing */ 43 /* nothing */
49} 44}
50 45
51void 46void
52xen_post_suspend(int suspend_cancelled) 47xen_arch_post_suspend(int suspend_cancelled)
53{ 48{
54 if (suspend_cancelled) 49 if (suspend_cancelled)
55 return; 50 return;
diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c
index c1c544513e8d..1f8244a78bee 100644
--- a/arch/ia64/xen/time.c
+++ b/arch/ia64/xen/time.c
@@ -139,14 +139,11 @@ consider_steal_time(unsigned long new_itm)
139 run_posix_cpu_timers(p); 139 run_posix_cpu_timers(p);
140 delta_itm += local_cpu_data->itm_delta * (stolen + blocked); 140 delta_itm += local_cpu_data->itm_delta * (stolen + blocked);
141 141
142 if (cpu == time_keeper_id) { 142 if (cpu == time_keeper_id)
143 write_seqlock(&xtime_lock); 143 xtime_update(stolen + blocked);
144 do_timer(stolen + blocked); 144
145 local_cpu_data->itm_next = delta_itm + new_itm; 145 local_cpu_data->itm_next = delta_itm + new_itm;
146 write_sequnlock(&xtime_lock); 146
147 } else {
148 local_cpu_data->itm_next = delta_itm + new_itm;
149 }
150 per_cpu(xen_stolen_time, cpu) += NS_PER_TICK * stolen; 147 per_cpu(xen_stolen_time, cpu) += NS_PER_TICK * stolen;
151 per_cpu(xen_blocked_time, cpu) += NS_PER_TICK * blocked; 148 per_cpu(xen_blocked_time, cpu) += NS_PER_TICK * blocked;
152 } 149 }
diff --git a/arch/m32r/kernel/time.c b/arch/m32r/kernel/time.c
index bda86820bffd..84dd04048db9 100644
--- a/arch/m32r/kernel/time.c
+++ b/arch/m32r/kernel/time.c
@@ -107,15 +107,14 @@ u32 arch_gettimeoffset(void)
107 107
108/* 108/*
109 * timer_interrupt() needs to keep up the real-time clock, 109 * timer_interrupt() needs to keep up the real-time clock,
110 * as well as call the "do_timer()" routine every clocktick 110 * as well as call the "xtime_update()" routine every clocktick
111 */ 111 */
112static irqreturn_t timer_interrupt(int irq, void *dev_id) 112static irqreturn_t timer_interrupt(int irq, void *dev_id)
113{ 113{
114#ifndef CONFIG_SMP 114#ifndef CONFIG_SMP
115 profile_tick(CPU_PROFILING); 115 profile_tick(CPU_PROFILING);
116#endif 116#endif
117 /* XXX FIXME. Uh, the xtime_lock should be held here, no? */ 117 xtime_update(1);
118 do_timer(1);
119 118
120#ifndef CONFIG_SMP 119#ifndef CONFIG_SMP
121 update_process_times(user_mode(get_irq_regs())); 120 update_process_times(user_mode(get_irq_regs()));
diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c
index 9fe6fefb5e14..1edd95095cb4 100644
--- a/arch/m68k/bvme6000/config.c
+++ b/arch/m68k/bvme6000/config.c
@@ -45,8 +45,8 @@ extern int bvme6000_set_clock_mmss (unsigned long);
45extern void bvme6000_reset (void); 45extern void bvme6000_reset (void);
46void bvme6000_set_vectors (void); 46void bvme6000_set_vectors (void);
47 47
48/* Save tick handler routine pointer, will point to do_timer() in 48/* Save tick handler routine pointer, will point to xtime_update() in
49 * kernel/sched.c, called via bvme6000_process_int() */ 49 * kernel/timer/timekeeping.c, called via bvme6000_process_int() */
50 50
51static irq_handler_t tick_handler; 51static irq_handler_t tick_handler;
52 52
diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c
index 06438dac08ff..18b34ee5db3b 100644
--- a/arch/m68k/kernel/time.c
+++ b/arch/m68k/kernel/time.c
@@ -37,11 +37,11 @@ static inline int set_rtc_mmss(unsigned long nowtime)
37 37
38/* 38/*
39 * timer_interrupt() needs to keep up the real-time clock, 39 * timer_interrupt() needs to keep up the real-time clock,
40 * as well as call the "do_timer()" routine every clocktick 40 * as well as call the "xtime_update()" routine every clocktick
41 */ 41 */
42static irqreturn_t timer_interrupt(int irq, void *dummy) 42static irqreturn_t timer_interrupt(int irq, void *dummy)
43{ 43{
44 do_timer(1); 44 xtime_update(1);
45 update_process_times(user_mode(get_irq_regs())); 45 update_process_times(user_mode(get_irq_regs()));
46 profile_tick(CPU_PROFILING); 46 profile_tick(CPU_PROFILING);
47 47
diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c
index 100baaa692a1..6cb9c3a9b6c9 100644
--- a/arch/m68k/mvme147/config.c
+++ b/arch/m68k/mvme147/config.c
@@ -46,8 +46,8 @@ extern void mvme147_reset (void);
46 46
47static int bcd2int (unsigned char b); 47static int bcd2int (unsigned char b);
48 48
49/* Save tick handler routine pointer, will point to do_timer() in 49/* Save tick handler routine pointer, will point to xtime_update() in
50 * kernel/sched.c, called via mvme147_process_int() */ 50 * kernel/time/timekeeping.c, called via mvme147_process_int() */
51 51
52irq_handler_t tick_handler; 52irq_handler_t tick_handler;
53 53
diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c
index 11edf61cc2c4..0b28e2621653 100644
--- a/arch/m68k/mvme16x/config.c
+++ b/arch/m68k/mvme16x/config.c
@@ -51,8 +51,8 @@ extern void mvme16x_reset (void);
51 51
52int bcd2int (unsigned char b); 52int bcd2int (unsigned char b);
53 53
54/* Save tick handler routine pointer, will point to do_timer() in 54/* Save tick handler routine pointer, will point to xtime_update() in
55 * kernel/sched.c, called via mvme16x_process_int() */ 55 * kernel/time/timekeeping.c, called via mvme16x_process_int() */
56 56
57static irq_handler_t tick_handler; 57static irq_handler_t tick_handler;
58 58
diff --git a/arch/m68k/sun3/sun3ints.c b/arch/m68k/sun3/sun3ints.c
index 2d9e21bd313a..6464ad3ae3e6 100644
--- a/arch/m68k/sun3/sun3ints.c
+++ b/arch/m68k/sun3/sun3ints.c
@@ -66,7 +66,7 @@ static irqreturn_t sun3_int5(int irq, void *dev_id)
66#ifdef CONFIG_SUN3 66#ifdef CONFIG_SUN3
67 intersil_clear(); 67 intersil_clear();
68#endif 68#endif
69 do_timer(1); 69 xtime_update(1);
70 update_process_times(user_mode(get_irq_regs())); 70 update_process_times(user_mode(get_irq_regs()));
71 if (!(kstat_cpu(0).irqs[irq] % 20)) 71 if (!(kstat_cpu(0).irqs[irq] % 20))
72 sun3_leds(led_pattern[(kstat_cpu(0).irqs[irq] % 160) / 20]); 72 sun3_leds(led_pattern[(kstat_cpu(0).irqs[irq] % 160) / 20]);
diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c
index d6ac2a43453c..6623909f70e6 100644
--- a/arch/m68knommu/kernel/time.c
+++ b/arch/m68knommu/kernel/time.c
@@ -36,7 +36,7 @@ static inline int set_rtc_mmss(unsigned long nowtime)
36#ifndef CONFIG_GENERIC_CLOCKEVENTS 36#ifndef CONFIG_GENERIC_CLOCKEVENTS
37/* 37/*
38 * timer_interrupt() needs to keep up the real-time clock, 38 * timer_interrupt() needs to keep up the real-time clock,
39 * as well as call the "do_timer()" routine every clocktick 39 * as well as call the "xtime_update()" routine every clocktick
40 */ 40 */
41irqreturn_t arch_timer_interrupt(int irq, void *dummy) 41irqreturn_t arch_timer_interrupt(int irq, void *dummy)
42{ 42{
@@ -44,11 +44,7 @@ irqreturn_t arch_timer_interrupt(int irq, void *dummy)
44 if (current->pid) 44 if (current->pid)
45 profile_tick(CPU_PROFILING); 45 profile_tick(CPU_PROFILING);
46 46
47 write_seqlock(&xtime_lock); 47 xtime_update(1);
48
49 do_timer(1);
50
51 write_sequnlock(&xtime_lock);
52 48
53 update_process_times(user_mode(get_irq_regs())); 49 update_process_times(user_mode(get_irq_regs()));
54 50
diff --git a/arch/microblaze/include/asm/futex.h b/arch/microblaze/include/asm/futex.h
index ad3fd61b2fe7..b0526d2716fa 100644
--- a/arch/microblaze/include/asm/futex.h
+++ b/arch/microblaze/include/asm/futex.h
@@ -29,7 +29,7 @@
29}) 29})
30 30
31static inline int 31static inline int
32futex_atomic_op_inuser(int encoded_op, int __user *uaddr) 32futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
33{ 33{
34 int op = (encoded_op >> 28) & 7; 34 int op = (encoded_op >> 28) & 7;
35 int cmp = (encoded_op >> 24) & 15; 35 int cmp = (encoded_op >> 24) & 15;
@@ -39,7 +39,7 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
39 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 39 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
40 oparg = 1 << oparg; 40 oparg = 1 << oparg;
41 41
42 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 42 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
43 return -EFAULT; 43 return -EFAULT;
44 44
45 pagefault_disable(); 45 pagefault_disable();
@@ -94,31 +94,34 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
94} 94}
95 95
96static inline int 96static inline int
97futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 97futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
98 u32 oldval, u32 newval)
98{ 99{
99 int prev, cmp; 100 int ret = 0, cmp;
101 u32 prev;
100 102
101 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 103 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
102 return -EFAULT; 104 return -EFAULT;
103 105
104 __asm__ __volatile__ ("1: lwx %0, %2, r0; \ 106 __asm__ __volatile__ ("1: lwx %1, %3, r0; \
105 cmp %1, %0, %3; \ 107 cmp %2, %1, %4; \
106 beqi %1, 3f; \ 108 beqi %2, 3f; \
107 2: swx %4, %2, r0; \ 109 2: swx %5, %3, r0; \
108 addic %1, r0, 0; \ 110 addic %2, r0, 0; \
109 bnei %1, 1b; \ 111 bnei %2, 1b; \
110 3: \ 112 3: \
111 .section .fixup,\"ax\"; \ 113 .section .fixup,\"ax\"; \
112 4: brid 3b; \ 114 4: brid 3b; \
113 addik %0, r0, %5; \ 115 addik %0, r0, %6; \
114 .previous; \ 116 .previous; \
115 .section __ex_table,\"a\"; \ 117 .section __ex_table,\"a\"; \
116 .word 1b,4b,2b,4b; \ 118 .word 1b,4b,2b,4b; \
117 .previous;" \ 119 .previous;" \
118 : "=&r" (prev), "=&r"(cmp) \ 120 : "+r" (ret), "=&r" (prev), "=&r"(cmp) \
119 : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT)); 121 : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT));
120 122
121 return prev; 123 *uval = prev;
124 return ret;
122} 125}
123 126
124#endif /* __KERNEL__ */ 127#endif /* __KERNEL__ */
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index f5ecc0566bc2..d88983516e26 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -4,6 +4,7 @@ config MIPS
4 select HAVE_GENERIC_DMA_COHERENT 4 select HAVE_GENERIC_DMA_COHERENT
5 select HAVE_IDE 5 select HAVE_IDE
6 select HAVE_OPROFILE 6 select HAVE_OPROFILE
7 select HAVE_IRQ_WORK
7 select HAVE_PERF_EVENTS 8 select HAVE_PERF_EVENTS
8 select PERF_USE_VMALLOC 9 select PERF_USE_VMALLOC
9 select HAVE_ARCH_KGDB 10 select HAVE_ARCH_KGDB
@@ -208,6 +209,7 @@ config MACH_JZ4740
208 select ARCH_REQUIRE_GPIOLIB 209 select ARCH_REQUIRE_GPIOLIB
209 select SYS_HAS_EARLY_PRINTK 210 select SYS_HAS_EARLY_PRINTK
210 select HAVE_PWM 211 select HAVE_PWM
212 select HAVE_CLK
211 213
212config LASAT 214config LASAT
213 bool "LASAT Networks platforms" 215 bool "LASAT Networks platforms"
@@ -333,6 +335,8 @@ config PNX8550_STB810
333config PMC_MSP 335config PMC_MSP
334 bool "PMC-Sierra MSP chipsets" 336 bool "PMC-Sierra MSP chipsets"
335 depends on EXPERIMENTAL 337 depends on EXPERIMENTAL
338 select CEVT_R4K
339 select CSRC_R4K
336 select DMA_NONCOHERENT 340 select DMA_NONCOHERENT
337 select SWAP_IO_SPACE 341 select SWAP_IO_SPACE
338 select NO_EXCEPT_FILL 342 select NO_EXCEPT_FILL
diff --git a/arch/mips/alchemy/mtx-1/board_setup.c b/arch/mips/alchemy/mtx-1/board_setup.c
index 6398fa95905c..40b84b991191 100644
--- a/arch/mips/alchemy/mtx-1/board_setup.c
+++ b/arch/mips/alchemy/mtx-1/board_setup.c
@@ -54,8 +54,8 @@ int mtx1_pci_idsel(unsigned int devsel, int assert);
54 54
55static void mtx1_reset(char *c) 55static void mtx1_reset(char *c)
56{ 56{
57 /* Hit BCSR.SYSTEM_CONTROL[SW_RST] */ 57 /* Jump to the reset vector */
58 au_writel(0x00000000, 0xAE00001C); 58 __asm__ __volatile__("jr\t%0"::"r"(0xbfc00000));
59} 59}
60 60
61static void mtx1_power_off(void) 61static void mtx1_power_off(void)
diff --git a/arch/mips/alchemy/mtx-1/platform.c b/arch/mips/alchemy/mtx-1/platform.c
index e30e42add697..956f946218c5 100644
--- a/arch/mips/alchemy/mtx-1/platform.c
+++ b/arch/mips/alchemy/mtx-1/platform.c
@@ -28,6 +28,8 @@
28#include <linux/mtd/physmap.h> 28#include <linux/mtd/physmap.h>
29#include <mtd/mtd-abi.h> 29#include <mtd/mtd-abi.h>
30 30
31#include <asm/mach-au1x00/au1xxx_eth.h>
32
31static struct gpio_keys_button mtx1_gpio_button[] = { 33static struct gpio_keys_button mtx1_gpio_button[] = {
32 { 34 {
33 .gpio = 207, 35 .gpio = 207,
@@ -140,10 +142,17 @@ static struct __initdata platform_device * mtx1_devs[] = {
140 &mtx1_mtd, 142 &mtx1_mtd,
141}; 143};
142 144
145static struct au1000_eth_platform_data mtx1_au1000_eth0_pdata = {
146 .phy_search_highest_addr = 1,
147 .phy1_search_mac0 = 1,
148};
149
143static int __init mtx1_register_devices(void) 150static int __init mtx1_register_devices(void)
144{ 151{
145 int rc; 152 int rc;
146 153
154 au1xxx_override_eth_cfg(0, &mtx1_au1000_eth0_pdata);
155
147 rc = gpio_request(mtx1_gpio_button[0].gpio, 156 rc = gpio_request(mtx1_gpio_button[0].gpio,
148 mtx1_gpio_button[0].desc); 157 mtx1_gpio_button[0].desc);
149 if (rc < 0) { 158 if (rc < 0) {
diff --git a/arch/mips/alchemy/xxs1500/board_setup.c b/arch/mips/alchemy/xxs1500/board_setup.c
index b43c918925d3..80c521e5290d 100644
--- a/arch/mips/alchemy/xxs1500/board_setup.c
+++ b/arch/mips/alchemy/xxs1500/board_setup.c
@@ -36,8 +36,8 @@
36 36
37static void xxs1500_reset(char *c) 37static void xxs1500_reset(char *c)
38{ 38{
39 /* Hit BCSR.SYSTEM_CONTROL[SW_RST] */ 39 /* Jump to the reset vector */
40 au_writel(0x00000000, 0xAE00001C); 40 __asm__ __volatile__("jr\t%0"::"r"(0xbfc00000));
41} 41}
42 42
43static void xxs1500_power_off(void) 43static void xxs1500_power_off(void)
diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h
index b9cce90346cf..6ebf1734b411 100644
--- a/arch/mips/include/asm/futex.h
+++ b/arch/mips/include/asm/futex.h
@@ -75,7 +75,7 @@
75} 75}
76 76
77static inline int 77static inline int
78futex_atomic_op_inuser(int encoded_op, int __user *uaddr) 78futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
79{ 79{
80 int op = (encoded_op >> 28) & 7; 80 int op = (encoded_op >> 28) & 7;
81 int cmp = (encoded_op >> 24) & 15; 81 int cmp = (encoded_op >> 24) & 15;
@@ -85,7 +85,7 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
85 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 85 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
86 oparg = 1 << oparg; 86 oparg = 1 << oparg;
87 87
88 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) 88 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
89 return -EFAULT; 89 return -EFAULT;
90 90
91 pagefault_disable(); 91 pagefault_disable();
@@ -132,11 +132,13 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
132} 132}
133 133
134static inline int 134static inline int
135futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 135futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
136 u32 oldval, u32 newval)
136{ 137{
137 int retval; 138 int ret = 0;
139 u32 val;
138 140
139 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 141 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
140 return -EFAULT; 142 return -EFAULT;
141 143
142 if (cpu_has_llsc && R10000_LLSC_WAR) { 144 if (cpu_has_llsc && R10000_LLSC_WAR) {
@@ -145,25 +147,25 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
145 " .set push \n" 147 " .set push \n"
146 " .set noat \n" 148 " .set noat \n"
147 " .set mips3 \n" 149 " .set mips3 \n"
148 "1: ll %0, %2 \n" 150 "1: ll %1, %3 \n"
149 " bne %0, %z3, 3f \n" 151 " bne %1, %z4, 3f \n"
150 " .set mips0 \n" 152 " .set mips0 \n"
151 " move $1, %z4 \n" 153 " move $1, %z5 \n"
152 " .set mips3 \n" 154 " .set mips3 \n"
153 "2: sc $1, %1 \n" 155 "2: sc $1, %2 \n"
154 " beqzl $1, 1b \n" 156 " beqzl $1, 1b \n"
155 __WEAK_LLSC_MB 157 __WEAK_LLSC_MB
156 "3: \n" 158 "3: \n"
157 " .set pop \n" 159 " .set pop \n"
158 " .section .fixup,\"ax\" \n" 160 " .section .fixup,\"ax\" \n"
159 "4: li %0, %5 \n" 161 "4: li %0, %6 \n"
160 " j 3b \n" 162 " j 3b \n"
161 " .previous \n" 163 " .previous \n"
162 " .section __ex_table,\"a\" \n" 164 " .section __ex_table,\"a\" \n"
163 " "__UA_ADDR "\t1b, 4b \n" 165 " "__UA_ADDR "\t1b, 4b \n"
164 " "__UA_ADDR "\t2b, 4b \n" 166 " "__UA_ADDR "\t2b, 4b \n"
165 " .previous \n" 167 " .previous \n"
166 : "=&r" (retval), "=R" (*uaddr) 168 : "+r" (ret), "=&r" (val), "=R" (*uaddr)
167 : "R" (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT) 169 : "R" (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT)
168 : "memory"); 170 : "memory");
169 } else if (cpu_has_llsc) { 171 } else if (cpu_has_llsc) {
@@ -172,31 +174,32 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
172 " .set push \n" 174 " .set push \n"
173 " .set noat \n" 175 " .set noat \n"
174 " .set mips3 \n" 176 " .set mips3 \n"
175 "1: ll %0, %2 \n" 177 "1: ll %1, %3 \n"
176 " bne %0, %z3, 3f \n" 178 " bne %1, %z4, 3f \n"
177 " .set mips0 \n" 179 " .set mips0 \n"
178 " move $1, %z4 \n" 180 " move $1, %z5 \n"
179 " .set mips3 \n" 181 " .set mips3 \n"
180 "2: sc $1, %1 \n" 182 "2: sc $1, %2 \n"
181 " beqz $1, 1b \n" 183 " beqz $1, 1b \n"
182 __WEAK_LLSC_MB 184 __WEAK_LLSC_MB
183 "3: \n" 185 "3: \n"
184 " .set pop \n" 186 " .set pop \n"
185 " .section .fixup,\"ax\" \n" 187 " .section .fixup,\"ax\" \n"
186 "4: li %0, %5 \n" 188 "4: li %0, %6 \n"
187 " j 3b \n" 189 " j 3b \n"
188 " .previous \n" 190 " .previous \n"
189 " .section __ex_table,\"a\" \n" 191 " .section __ex_table,\"a\" \n"
190 " "__UA_ADDR "\t1b, 4b \n" 192 " "__UA_ADDR "\t1b, 4b \n"
191 " "__UA_ADDR "\t2b, 4b \n" 193 " "__UA_ADDR "\t2b, 4b \n"
192 " .previous \n" 194 " .previous \n"
193 : "=&r" (retval), "=R" (*uaddr) 195 : "+r" (ret), "=&r" (val), "=R" (*uaddr)
194 : "R" (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT) 196 : "R" (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT)
195 : "memory"); 197 : "memory");
196 } else 198 } else
197 return -ENOSYS; 199 return -ENOSYS;
198 200
199 return retval; 201 *uval = val;
202 return ret;
200} 203}
201 204
202#endif 205#endif
diff --git a/arch/mips/include/asm/perf_event.h b/arch/mips/include/asm/perf_event.h
index e00007cf8162..d0c77496c728 100644
--- a/arch/mips/include/asm/perf_event.h
+++ b/arch/mips/include/asm/perf_event.h
@@ -11,15 +11,5 @@
11 11
12#ifndef __MIPS_PERF_EVENT_H__ 12#ifndef __MIPS_PERF_EVENT_H__
13#define __MIPS_PERF_EVENT_H__ 13#define __MIPS_PERF_EVENT_H__
14 14/* Leave it empty here. The file is required by linux/perf_event.h */
15/*
16 * MIPS performance counters do not raise NMI upon overflow, a regular
17 * interrupt will be signaled. Hence we can do the pending perf event
18 * work at the tail of the irq handler.
19 */
20static inline void
21set_perf_event_pending(void)
22{
23}
24
25#endif /* __MIPS_PERF_EVENT_H__ */ 15#endif /* __MIPS_PERF_EVENT_H__ */
diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
index 5a84a1f11231..94ca2b018af7 100644
--- a/arch/mips/kernel/ftrace.c
+++ b/arch/mips/kernel/ftrace.c
@@ -17,29 +17,13 @@
17#include <asm/cacheflush.h> 17#include <asm/cacheflush.h>
18#include <asm/uasm.h> 18#include <asm/uasm.h>
19 19
20/* 20#include <asm-generic/sections.h>
21 * If the Instruction Pointer is in module space (0xc0000000), return true;
22 * otherwise, it is in kernel space (0x80000000), return false.
23 *
24 * FIXME: This will not work when the kernel space and module space are the
25 * same. If they are the same, we need to modify scripts/recordmcount.pl,
26 * ftrace_make_nop/call() and the other related parts to ensure the
27 * enabling/disabling of the calling site to _mcount is right for both kernel
28 * and module.
29 */
30
31static inline int in_module(unsigned long ip)
32{
33 return ip & 0x40000000;
34}
35 21
36#ifdef CONFIG_DYNAMIC_FTRACE 22#ifdef CONFIG_DYNAMIC_FTRACE
37 23
38#define JAL 0x0c000000 /* jump & link: ip --> ra, jump to target */ 24#define JAL 0x0c000000 /* jump & link: ip --> ra, jump to target */
39#define ADDR_MASK 0x03ffffff /* op_code|addr : 31...26|25 ....0 */ 25#define ADDR_MASK 0x03ffffff /* op_code|addr : 31...26|25 ....0 */
40 26
41#define INSN_B_1F_4 0x10000004 /* b 1f; offset = 4 */
42#define INSN_B_1F_5 0x10000005 /* b 1f; offset = 5 */
43#define INSN_NOP 0x00000000 /* nop */ 27#define INSN_NOP 0x00000000 /* nop */
44#define INSN_JAL(addr) \ 28#define INSN_JAL(addr) \
45 ((unsigned int)(JAL | (((addr) >> 2) & ADDR_MASK))) 29 ((unsigned int)(JAL | (((addr) >> 2) & ADDR_MASK)))
@@ -69,6 +53,20 @@ static inline void ftrace_dyn_arch_init_insns(void)
69#endif 53#endif
70} 54}
71 55
56/*
57 * Check if the address is in kernel space
58 *
59 * Clone core_kernel_text() from kernel/extable.c, but doesn't call
60 * init_kernel_text() for Ftrace doesn't trace functions in init sections.
61 */
62static inline int in_kernel_space(unsigned long ip)
63{
64 if (ip >= (unsigned long)_stext &&
65 ip <= (unsigned long)_etext)
66 return 1;
67 return 0;
68}
69
72static int ftrace_modify_code(unsigned long ip, unsigned int new_code) 70static int ftrace_modify_code(unsigned long ip, unsigned int new_code)
73{ 71{
74 int faulted; 72 int faulted;
@@ -84,6 +82,42 @@ static int ftrace_modify_code(unsigned long ip, unsigned int new_code)
84 return 0; 82 return 0;
85} 83}
86 84
85/*
86 * The details about the calling site of mcount on MIPS
87 *
88 * 1. For kernel:
89 *
90 * move at, ra
91 * jal _mcount --> nop
92 *
93 * 2. For modules:
94 *
95 * 2.1 For KBUILD_MCOUNT_RA_ADDRESS and CONFIG_32BIT
96 *
97 * lui v1, hi_16bit_of_mcount --> b 1f (0x10000005)
98 * addiu v1, v1, low_16bit_of_mcount
99 * move at, ra
100 * move $12, ra_address
101 * jalr v1
102 * sub sp, sp, 8
103 * 1: offset = 5 instructions
104 * 2.2 For the Other situations
105 *
106 * lui v1, hi_16bit_of_mcount --> b 1f (0x10000004)
107 * addiu v1, v1, low_16bit_of_mcount
108 * move at, ra
109 * jalr v1
110 * nop | move $12, ra_address | sub sp, sp, 8
111 * 1: offset = 4 instructions
112 */
113
114#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT)
115#define MCOUNT_OFFSET_INSNS 5
116#else
117#define MCOUNT_OFFSET_INSNS 4
118#endif
119#define INSN_B_1F (0x10000000 | MCOUNT_OFFSET_INSNS)
120
87int ftrace_make_nop(struct module *mod, 121int ftrace_make_nop(struct module *mod,
88 struct dyn_ftrace *rec, unsigned long addr) 122 struct dyn_ftrace *rec, unsigned long addr)
89{ 123{
@@ -91,39 +125,11 @@ int ftrace_make_nop(struct module *mod,
91 unsigned long ip = rec->ip; 125 unsigned long ip = rec->ip;
92 126
93 /* 127 /*
94 * We have compiled module with -mlong-calls, but compiled the kernel 128 * If ip is in kernel space, no long call, otherwise, long call is
95 * without it, we need to cope with them respectively. 129 * needed.
96 */ 130 */
97 if (in_module(ip)) { 131 new = in_kernel_space(ip) ? INSN_NOP : INSN_B_1F;
98#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT) 132
99 /*
100 * lui v1, hi_16bit_of_mcount --> b 1f (0x10000005)
101 * addiu v1, v1, low_16bit_of_mcount
102 * move at, ra
103 * move $12, ra_address
104 * jalr v1
105 * sub sp, sp, 8
106 * 1: offset = 5 instructions
107 */
108 new = INSN_B_1F_5;
109#else
110 /*
111 * lui v1, hi_16bit_of_mcount --> b 1f (0x10000004)
112 * addiu v1, v1, low_16bit_of_mcount
113 * move at, ra
114 * jalr v1
115 * nop | move $12, ra_address | sub sp, sp, 8
116 * 1: offset = 4 instructions
117 */
118 new = INSN_B_1F_4;
119#endif
120 } else {
121 /*
122 * move at, ra
123 * jal _mcount --> nop
124 */
125 new = INSN_NOP;
126 }
127 return ftrace_modify_code(ip, new); 133 return ftrace_modify_code(ip, new);
128} 134}
129 135
@@ -132,8 +138,8 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
132 unsigned int new; 138 unsigned int new;
133 unsigned long ip = rec->ip; 139 unsigned long ip = rec->ip;
134 140
135 /* ip, module: 0xc0000000, kernel: 0x80000000 */ 141 new = in_kernel_space(ip) ? insn_jal_ftrace_caller :
136 new = in_module(ip) ? insn_lui_v1_hi16_mcount : insn_jal_ftrace_caller; 142 insn_lui_v1_hi16_mcount;
137 143
138 return ftrace_modify_code(ip, new); 144 return ftrace_modify_code(ip, new);
139} 145}
@@ -190,29 +196,25 @@ int ftrace_disable_ftrace_graph_caller(void)
190#define S_R_SP (0xafb0 << 16) /* s{d,w} R, offset(sp) */ 196#define S_R_SP (0xafb0 << 16) /* s{d,w} R, offset(sp) */
191#define OFFSET_MASK 0xffff /* stack offset range: 0 ~ PT_SIZE */ 197#define OFFSET_MASK 0xffff /* stack offset range: 0 ~ PT_SIZE */
192 198
193unsigned long ftrace_get_parent_addr(unsigned long self_addr, 199unsigned long ftrace_get_parent_ra_addr(unsigned long self_ra, unsigned long
194 unsigned long parent, 200 old_parent_ra, unsigned long parent_ra_addr, unsigned long fp)
195 unsigned long parent_addr,
196 unsigned long fp)
197{ 201{
198 unsigned long sp, ip, ra; 202 unsigned long sp, ip, tmp;
199 unsigned int code; 203 unsigned int code;
200 int faulted; 204 int faulted;
201 205
202 /* 206 /*
203 * For module, move the ip from calling site of mcount to the 207 * For module, move the ip from the return address after the
204 * instruction "lui v1, hi_16bit_of_mcount"(offset is 20), but for 208 * instruction "lui v1, hi_16bit_of_mcount"(offset is 24), but for
205 * kernel, move to the instruction "move ra, at"(offset is 12) 209 * kernel, move after the instruction "move ra, at"(offset is 16)
206 */ 210 */
207 ip = self_addr - (in_module(self_addr) ? 20 : 12); 211 ip = self_ra - (in_kernel_space(self_ra) ? 16 : 24);
208 212
209 /* 213 /*
210 * search the text until finding the non-store instruction or "s{d,w} 214 * search the text until finding the non-store instruction or "s{d,w}
211 * ra, offset(sp)" instruction 215 * ra, offset(sp)" instruction
212 */ 216 */
213 do { 217 do {
214 ip -= 4;
215
216 /* get the code at "ip": code = *(unsigned int *)ip; */ 218 /* get the code at "ip": code = *(unsigned int *)ip; */
217 safe_load_code(code, ip, faulted); 219 safe_load_code(code, ip, faulted);
218 220
@@ -224,18 +226,20 @@ unsigned long ftrace_get_parent_addr(unsigned long self_addr,
224 * store the ra on the stack 226 * store the ra on the stack
225 */ 227 */
226 if ((code & S_R_SP) != S_R_SP) 228 if ((code & S_R_SP) != S_R_SP)
227 return parent_addr; 229 return parent_ra_addr;
228 230
229 } while (((code & S_RA_SP) != S_RA_SP)); 231 /* Move to the next instruction */
232 ip -= 4;
233 } while ((code & S_RA_SP) != S_RA_SP);
230 234
231 sp = fp + (code & OFFSET_MASK); 235 sp = fp + (code & OFFSET_MASK);
232 236
233 /* ra = *(unsigned long *)sp; */ 237 /* tmp = *(unsigned long *)sp; */
234 safe_load_stack(ra, sp, faulted); 238 safe_load_stack(tmp, sp, faulted);
235 if (unlikely(faulted)) 239 if (unlikely(faulted))
236 return 0; 240 return 0;
237 241
238 if (ra == parent) 242 if (tmp == old_parent_ra)
239 return sp; 243 return sp;
240 return 0; 244 return 0;
241} 245}
@@ -246,21 +250,21 @@ unsigned long ftrace_get_parent_addr(unsigned long self_addr,
246 * Hook the return address and push it in the stack of return addrs 250 * Hook the return address and push it in the stack of return addrs
247 * in current thread info. 251 * in current thread info.
248 */ 252 */
249void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, 253void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra,
250 unsigned long fp) 254 unsigned long fp)
251{ 255{
252 unsigned long old; 256 unsigned long old_parent_ra;
253 struct ftrace_graph_ent trace; 257 struct ftrace_graph_ent trace;
254 unsigned long return_hooker = (unsigned long) 258 unsigned long return_hooker = (unsigned long)
255 &return_to_handler; 259 &return_to_handler;
256 int faulted; 260 int faulted, insns;
257 261
258 if (unlikely(atomic_read(&current->tracing_graph_pause))) 262 if (unlikely(atomic_read(&current->tracing_graph_pause)))
259 return; 263 return;
260 264
261 /* 265 /*
262 * "parent" is the stack address saved the return address of the caller 266 * "parent_ra_addr" is the stack address saved the return address of
263 * of _mcount. 267 * the caller of _mcount.
264 * 268 *
265 * if the gcc < 4.5, a leaf function does not save the return address 269 * if the gcc < 4.5, a leaf function does not save the return address
266 * in the stack address, so, we "emulate" one in _mcount's stack space, 270 * in the stack address, so, we "emulate" one in _mcount's stack space,
@@ -275,37 +279,44 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
275 * do it in ftrace_graph_caller of mcount.S. 279 * do it in ftrace_graph_caller of mcount.S.
276 */ 280 */
277 281
278 /* old = *parent; */ 282 /* old_parent_ra = *parent_ra_addr; */
279 safe_load_stack(old, parent, faulted); 283 safe_load_stack(old_parent_ra, parent_ra_addr, faulted);
280 if (unlikely(faulted)) 284 if (unlikely(faulted))
281 goto out; 285 goto out;
282#ifndef KBUILD_MCOUNT_RA_ADDRESS 286#ifndef KBUILD_MCOUNT_RA_ADDRESS
283 parent = (unsigned long *)ftrace_get_parent_addr(self_addr, old, 287 parent_ra_addr = (unsigned long *)ftrace_get_parent_ra_addr(self_ra,
284 (unsigned long)parent, fp); 288 old_parent_ra, (unsigned long)parent_ra_addr, fp);
285 /* 289 /*
286 * If fails when getting the stack address of the non-leaf function's 290 * If fails when getting the stack address of the non-leaf function's
287 * ra, stop function graph tracer and return 291 * ra, stop function graph tracer and return
288 */ 292 */
289 if (parent == 0) 293 if (parent_ra_addr == 0)
290 goto out; 294 goto out;
291#endif 295#endif
292 /* *parent = return_hooker; */ 296 /* *parent_ra_addr = return_hooker; */
293 safe_store_stack(return_hooker, parent, faulted); 297 safe_store_stack(return_hooker, parent_ra_addr, faulted);
294 if (unlikely(faulted)) 298 if (unlikely(faulted))
295 goto out; 299 goto out;
296 300
297 if (ftrace_push_return_trace(old, self_addr, &trace.depth, fp) == 301 if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp)
298 -EBUSY) { 302 == -EBUSY) {
299 *parent = old; 303 *parent_ra_addr = old_parent_ra;
300 return; 304 return;
301 } 305 }
302 306
303 trace.func = self_addr; 307 /*
308 * Get the recorded ip of the current mcount calling site in the
309 * __mcount_loc section, which will be used to filter the function
310 * entries configured through the tracing/set_graph_function interface.
311 */
312
313 insns = in_kernel_space(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1;
314 trace.func = self_ra - (MCOUNT_INSN_SIZE * insns);
304 315
305 /* Only trace if the calling function expects to */ 316 /* Only trace if the calling function expects to */
306 if (!ftrace_graph_entry(&trace)) { 317 if (!ftrace_graph_entry(&trace)) {
307 current->curr_ret_stack--; 318 current->curr_ret_stack--;
308 *parent = old; 319 *parent_ra_addr = old_parent_ra;
309 } 320 }
310 return; 321 return;
311out: 322out:
diff --git a/arch/mips/kernel/perf_event.c b/arch/mips/kernel/perf_event.c
index 2b7f3f703b83..a8244854d3dc 100644
--- a/arch/mips/kernel/perf_event.c
+++ b/arch/mips/kernel/perf_event.c
@@ -161,41 +161,6 @@ mipspmu_event_set_period(struct perf_event *event,
161 return ret; 161 return ret;
162} 162}
163 163
164static int mipspmu_enable(struct perf_event *event)
165{
166 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
167 struct hw_perf_event *hwc = &event->hw;
168 int idx;
169 int err = 0;
170
171 /* To look for a free counter for this event. */
172 idx = mipspmu->alloc_counter(cpuc, hwc);
173 if (idx < 0) {
174 err = idx;
175 goto out;
176 }
177
178 /*
179 * If there is an event in the counter we are going to use then
180 * make sure it is disabled.
181 */
182 event->hw.idx = idx;
183 mipspmu->disable_event(idx);
184 cpuc->events[idx] = event;
185
186 /* Set the period for the event. */
187 mipspmu_event_set_period(event, hwc, idx);
188
189 /* Enable the event. */
190 mipspmu->enable_event(hwc, idx);
191
192 /* Propagate our changes to the userspace mapping. */
193 perf_event_update_userpage(event);
194
195out:
196 return err;
197}
198
199static void mipspmu_event_update(struct perf_event *event, 164static void mipspmu_event_update(struct perf_event *event,
200 struct hw_perf_event *hwc, 165 struct hw_perf_event *hwc,
201 int idx) 166 int idx)
@@ -204,7 +169,7 @@ static void mipspmu_event_update(struct perf_event *event,
204 unsigned long flags; 169 unsigned long flags;
205 int shift = 64 - TOTAL_BITS; 170 int shift = 64 - TOTAL_BITS;
206 s64 prev_raw_count, new_raw_count; 171 s64 prev_raw_count, new_raw_count;
207 s64 delta; 172 u64 delta;
208 173
209again: 174again:
210 prev_raw_count = local64_read(&hwc->prev_count); 175 prev_raw_count = local64_read(&hwc->prev_count);
@@ -231,32 +196,90 @@ again:
231 return; 196 return;
232} 197}
233 198
234static void mipspmu_disable(struct perf_event *event) 199static void mipspmu_start(struct perf_event *event, int flags)
200{
201 struct hw_perf_event *hwc = &event->hw;
202
203 if (!mipspmu)
204 return;
205
206 if (flags & PERF_EF_RELOAD)
207 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
208
209 hwc->state = 0;
210
211 /* Set the period for the event. */
212 mipspmu_event_set_period(event, hwc, hwc->idx);
213
214 /* Enable the event. */
215 mipspmu->enable_event(hwc, hwc->idx);
216}
217
218static void mipspmu_stop(struct perf_event *event, int flags)
219{
220 struct hw_perf_event *hwc = &event->hw;
221
222 if (!mipspmu)
223 return;
224
225 if (!(hwc->state & PERF_HES_STOPPED)) {
226 /* We are working on a local event. */
227 mipspmu->disable_event(hwc->idx);
228 barrier();
229 mipspmu_event_update(event, hwc, hwc->idx);
230 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
231 }
232}
233
234static int mipspmu_add(struct perf_event *event, int flags)
235{ 235{
236 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 236 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
237 struct hw_perf_event *hwc = &event->hw; 237 struct hw_perf_event *hwc = &event->hw;
238 int idx = hwc->idx; 238 int idx;
239 int err = 0;
239 240
241 perf_pmu_disable(event->pmu);
240 242
241 WARN_ON(idx < 0 || idx >= mipspmu->num_counters); 243 /* To look for a free counter for this event. */
244 idx = mipspmu->alloc_counter(cpuc, hwc);
245 if (idx < 0) {
246 err = idx;
247 goto out;
248 }
242 249
243 /* We are working on a local event. */ 250 /*
251 * If there is an event in the counter we are going to use then
252 * make sure it is disabled.
253 */
254 event->hw.idx = idx;
244 mipspmu->disable_event(idx); 255 mipspmu->disable_event(idx);
256 cpuc->events[idx] = event;
245 257
246 barrier(); 258 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
247 259 if (flags & PERF_EF_START)
248 mipspmu_event_update(event, hwc, idx); 260 mipspmu_start(event, PERF_EF_RELOAD);
249 cpuc->events[idx] = NULL;
250 clear_bit(idx, cpuc->used_mask);
251 261
262 /* Propagate our changes to the userspace mapping. */
252 perf_event_update_userpage(event); 263 perf_event_update_userpage(event);
264
265out:
266 perf_pmu_enable(event->pmu);
267 return err;
253} 268}
254 269
255static void mipspmu_unthrottle(struct perf_event *event) 270static void mipspmu_del(struct perf_event *event, int flags)
256{ 271{
272 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
257 struct hw_perf_event *hwc = &event->hw; 273 struct hw_perf_event *hwc = &event->hw;
274 int idx = hwc->idx;
258 275
259 mipspmu->enable_event(hwc, hwc->idx); 276 WARN_ON(idx < 0 || idx >= mipspmu->num_counters);
277
278 mipspmu_stop(event, PERF_EF_UPDATE);
279 cpuc->events[idx] = NULL;
280 clear_bit(idx, cpuc->used_mask);
281
282 perf_event_update_userpage(event);
260} 283}
261 284
262static void mipspmu_read(struct perf_event *event) 285static void mipspmu_read(struct perf_event *event)
@@ -270,12 +293,17 @@ static void mipspmu_read(struct perf_event *event)
270 mipspmu_event_update(event, hwc, hwc->idx); 293 mipspmu_event_update(event, hwc, hwc->idx);
271} 294}
272 295
273static struct pmu pmu = { 296static void mipspmu_enable(struct pmu *pmu)
274 .enable = mipspmu_enable, 297{
275 .disable = mipspmu_disable, 298 if (mipspmu)
276 .unthrottle = mipspmu_unthrottle, 299 mipspmu->start();
277 .read = mipspmu_read, 300}
278}; 301
302static void mipspmu_disable(struct pmu *pmu)
303{
304 if (mipspmu)
305 mipspmu->stop();
306}
279 307
280static atomic_t active_events = ATOMIC_INIT(0); 308static atomic_t active_events = ATOMIC_INIT(0);
281static DEFINE_MUTEX(pmu_reserve_mutex); 309static DEFINE_MUTEX(pmu_reserve_mutex);
@@ -318,6 +346,82 @@ static void mipspmu_free_irq(void)
318 perf_irq = save_perf_irq; 346 perf_irq = save_perf_irq;
319} 347}
320 348
349/*
350 * mipsxx/rm9000/loongson2 have different performance counters, they have
351 * specific low-level init routines.
352 */
353static void reset_counters(void *arg);
354static int __hw_perf_event_init(struct perf_event *event);
355
356static void hw_perf_event_destroy(struct perf_event *event)
357{
358 if (atomic_dec_and_mutex_lock(&active_events,
359 &pmu_reserve_mutex)) {
360 /*
361 * We must not call the destroy function with interrupts
362 * disabled.
363 */
364 on_each_cpu(reset_counters,
365 (void *)(long)mipspmu->num_counters, 1);
366 mipspmu_free_irq();
367 mutex_unlock(&pmu_reserve_mutex);
368 }
369}
370
371static int mipspmu_event_init(struct perf_event *event)
372{
373 int err = 0;
374
375 switch (event->attr.type) {
376 case PERF_TYPE_RAW:
377 case PERF_TYPE_HARDWARE:
378 case PERF_TYPE_HW_CACHE:
379 break;
380
381 default:
382 return -ENOENT;
383 }
384
385 if (!mipspmu || event->cpu >= nr_cpumask_bits ||
386 (event->cpu >= 0 && !cpu_online(event->cpu)))
387 return -ENODEV;
388
389 if (!atomic_inc_not_zero(&active_events)) {
390 if (atomic_read(&active_events) > MIPS_MAX_HWEVENTS) {
391 atomic_dec(&active_events);
392 return -ENOSPC;
393 }
394
395 mutex_lock(&pmu_reserve_mutex);
396 if (atomic_read(&active_events) == 0)
397 err = mipspmu_get_irq();
398
399 if (!err)
400 atomic_inc(&active_events);
401 mutex_unlock(&pmu_reserve_mutex);
402 }
403
404 if (err)
405 return err;
406
407 err = __hw_perf_event_init(event);
408 if (err)
409 hw_perf_event_destroy(event);
410
411 return err;
412}
413
414static struct pmu pmu = {
415 .pmu_enable = mipspmu_enable,
416 .pmu_disable = mipspmu_disable,
417 .event_init = mipspmu_event_init,
418 .add = mipspmu_add,
419 .del = mipspmu_del,
420 .start = mipspmu_start,
421 .stop = mipspmu_stop,
422 .read = mipspmu_read,
423};
424
321static inline unsigned int 425static inline unsigned int
322mipspmu_perf_event_encode(const struct mips_perf_event *pev) 426mipspmu_perf_event_encode(const struct mips_perf_event *pev)
323{ 427{
@@ -382,8 +486,9 @@ static int validate_event(struct cpu_hw_events *cpuc,
382{ 486{
383 struct hw_perf_event fake_hwc = event->hw; 487 struct hw_perf_event fake_hwc = event->hw;
384 488
385 if (event->pmu && event->pmu != &pmu) 489 /* Allow mixed event group. So return 1 to pass validation. */
386 return 0; 490 if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF)
491 return 1;
387 492
388 return mipspmu->alloc_counter(cpuc, &fake_hwc) >= 0; 493 return mipspmu->alloc_counter(cpuc, &fake_hwc) >= 0;
389} 494}
@@ -409,73 +514,6 @@ static int validate_group(struct perf_event *event)
409 return 0; 514 return 0;
410} 515}
411 516
412/*
413 * mipsxx/rm9000/loongson2 have different performance counters, they have
414 * specific low-level init routines.
415 */
416static void reset_counters(void *arg);
417static int __hw_perf_event_init(struct perf_event *event);
418
419static void hw_perf_event_destroy(struct perf_event *event)
420{
421 if (atomic_dec_and_mutex_lock(&active_events,
422 &pmu_reserve_mutex)) {
423 /*
424 * We must not call the destroy function with interrupts
425 * disabled.
426 */
427 on_each_cpu(reset_counters,
428 (void *)(long)mipspmu->num_counters, 1);
429 mipspmu_free_irq();
430 mutex_unlock(&pmu_reserve_mutex);
431 }
432}
433
434const struct pmu *hw_perf_event_init(struct perf_event *event)
435{
436 int err = 0;
437
438 if (!mipspmu || event->cpu >= nr_cpumask_bits ||
439 (event->cpu >= 0 && !cpu_online(event->cpu)))
440 return ERR_PTR(-ENODEV);
441
442 if (!atomic_inc_not_zero(&active_events)) {
443 if (atomic_read(&active_events) > MIPS_MAX_HWEVENTS) {
444 atomic_dec(&active_events);
445 return ERR_PTR(-ENOSPC);
446 }
447
448 mutex_lock(&pmu_reserve_mutex);
449 if (atomic_read(&active_events) == 0)
450 err = mipspmu_get_irq();
451
452 if (!err)
453 atomic_inc(&active_events);
454 mutex_unlock(&pmu_reserve_mutex);
455 }
456
457 if (err)
458 return ERR_PTR(err);
459
460 err = __hw_perf_event_init(event);
461 if (err)
462 hw_perf_event_destroy(event);
463
464 return err ? ERR_PTR(err) : &pmu;
465}
466
467void hw_perf_enable(void)
468{
469 if (mipspmu)
470 mipspmu->start();
471}
472
473void hw_perf_disable(void)
474{
475 if (mipspmu)
476 mipspmu->stop();
477}
478
479/* This is needed by specific irq handlers in perf_event_*.c */ 517/* This is needed by specific irq handlers in perf_event_*.c */
480static void 518static void
481handle_associated_event(struct cpu_hw_events *cpuc, 519handle_associated_event(struct cpu_hw_events *cpuc,
@@ -496,21 +534,13 @@ handle_associated_event(struct cpu_hw_events *cpuc,
496#include "perf_event_mipsxx.c" 534#include "perf_event_mipsxx.c"
497 535
498/* Callchain handling code. */ 536/* Callchain handling code. */
499static inline void
500callchain_store(struct perf_callchain_entry *entry,
501 u64 ip)
502{
503 if (entry->nr < PERF_MAX_STACK_DEPTH)
504 entry->ip[entry->nr++] = ip;
505}
506 537
507/* 538/*
508 * Leave userspace callchain empty for now. When we find a way to trace 539 * Leave userspace callchain empty for now. When we find a way to trace
509 * the user stack callchains, we add here. 540 * the user stack callchains, we add here.
510 */ 541 */
511static void 542void perf_callchain_user(struct perf_callchain_entry *entry,
512perf_callchain_user(struct pt_regs *regs, 543 struct pt_regs *regs)
513 struct perf_callchain_entry *entry)
514{ 544{
515} 545}
516 546
@@ -523,23 +553,21 @@ static void save_raw_perf_callchain(struct perf_callchain_entry *entry,
523 while (!kstack_end(sp)) { 553 while (!kstack_end(sp)) {
524 addr = *sp++; 554 addr = *sp++;
525 if (__kernel_text_address(addr)) { 555 if (__kernel_text_address(addr)) {
526 callchain_store(entry, addr); 556 perf_callchain_store(entry, addr);
527 if (entry->nr >= PERF_MAX_STACK_DEPTH) 557 if (entry->nr >= PERF_MAX_STACK_DEPTH)
528 break; 558 break;
529 } 559 }
530 } 560 }
531} 561}
532 562
533static void 563void perf_callchain_kernel(struct perf_callchain_entry *entry,
534perf_callchain_kernel(struct pt_regs *regs, 564 struct pt_regs *regs)
535 struct perf_callchain_entry *entry)
536{ 565{
537 unsigned long sp = regs->regs[29]; 566 unsigned long sp = regs->regs[29];
538#ifdef CONFIG_KALLSYMS 567#ifdef CONFIG_KALLSYMS
539 unsigned long ra = regs->regs[31]; 568 unsigned long ra = regs->regs[31];
540 unsigned long pc = regs->cp0_epc; 569 unsigned long pc = regs->cp0_epc;
541 570
542 callchain_store(entry, PERF_CONTEXT_KERNEL);
543 if (raw_show_trace || !__kernel_text_address(pc)) { 571 if (raw_show_trace || !__kernel_text_address(pc)) {
544 unsigned long stack_page = 572 unsigned long stack_page =
545 (unsigned long)task_stack_page(current); 573 (unsigned long)task_stack_page(current);
@@ -549,53 +577,12 @@ perf_callchain_kernel(struct pt_regs *regs,
549 return; 577 return;
550 } 578 }
551 do { 579 do {
552 callchain_store(entry, pc); 580 perf_callchain_store(entry, pc);
553 if (entry->nr >= PERF_MAX_STACK_DEPTH) 581 if (entry->nr >= PERF_MAX_STACK_DEPTH)
554 break; 582 break;
555 pc = unwind_stack(current, &sp, pc, &ra); 583 pc = unwind_stack(current, &sp, pc, &ra);
556 } while (pc); 584 } while (pc);
557#else 585#else
558 callchain_store(entry, PERF_CONTEXT_KERNEL);
559 save_raw_perf_callchain(entry, sp); 586 save_raw_perf_callchain(entry, sp);
560#endif 587#endif
561} 588}
562
563static void
564perf_do_callchain(struct pt_regs *regs,
565 struct perf_callchain_entry *entry)
566{
567 int is_user;
568
569 if (!regs)
570 return;
571
572 is_user = user_mode(regs);
573
574 if (!current || !current->pid)
575 return;
576
577 if (is_user && current->state != TASK_RUNNING)
578 return;
579
580 if (!is_user) {
581 perf_callchain_kernel(regs, entry);
582 if (current->mm)
583 regs = task_pt_regs(current);
584 else
585 regs = NULL;
586 }
587 if (regs)
588 perf_callchain_user(regs, entry);
589}
590
591static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
592
593struct perf_callchain_entry *
594perf_callchain(struct pt_regs *regs)
595{
596 struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry);
597
598 entry->nr = 0;
599 perf_do_callchain(regs, entry);
600 return entry;
601}
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 183e0d226669..d9a7db78ed62 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -696,7 +696,7 @@ static int mipsxx_pmu_handle_shared_irq(void)
696 * interrupt, not NMI. 696 * interrupt, not NMI.
697 */ 697 */
698 if (handled == IRQ_HANDLED) 698 if (handled == IRQ_HANDLED)
699 perf_event_do_pending(); 699 irq_work_run();
700 700
701#ifdef CONFIG_MIPS_MT_SMP 701#ifdef CONFIG_MIPS_MT_SMP
702 read_unlock(&pmuint_rwlock); 702 read_unlock(&pmuint_rwlock);
@@ -1045,6 +1045,8 @@ init_hw_perf_events(void)
1045 "CPU, irq %d%s\n", mipspmu->name, counters, irq, 1045 "CPU, irq %d%s\n", mipspmu->name, counters, irq,
1046 irq < 0 ? " (share with timer interrupt)" : ""); 1046 irq < 0 ? " (share with timer interrupt)" : "");
1047 1047
1048 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
1049
1048 return 0; 1050 return 0;
1049} 1051}
1050early_initcall(init_hw_perf_events); 1052early_initcall(init_hw_perf_events);
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 5922342bca39..dbbe0ce48d89 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -84,7 +84,7 @@ static int protected_save_fp_context(struct sigcontext __user *sc)
84 84
85static int protected_restore_fp_context(struct sigcontext __user *sc) 85static int protected_restore_fp_context(struct sigcontext __user *sc)
86{ 86{
87 int err, tmp; 87 int err, tmp __maybe_unused;
88 while (1) { 88 while (1) {
89 lock_fpu_owner(); 89 lock_fpu_owner();
90 own_fpu_inatomic(0); 90 own_fpu_inatomic(0);
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index a0ed0e052b2e..aae986613795 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -115,7 +115,7 @@ static int protected_save_fp_context32(struct sigcontext32 __user *sc)
115 115
116static int protected_restore_fp_context32(struct sigcontext32 __user *sc) 116static int protected_restore_fp_context32(struct sigcontext32 __user *sc)
117{ 117{
118 int err, tmp; 118 int err, tmp __maybe_unused;
119 while (1) { 119 while (1) {
120 lock_fpu_owner(); 120 lock_fpu_owner();
121 own_fpu_inatomic(0); 121 own_fpu_inatomic(0);
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 383aeb95cb49..32a256101082 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -193,6 +193,22 @@ void __devinit smp_prepare_boot_cpu(void)
193 */ 193 */
194static struct task_struct *cpu_idle_thread[NR_CPUS]; 194static struct task_struct *cpu_idle_thread[NR_CPUS];
195 195
196struct create_idle {
197 struct work_struct work;
198 struct task_struct *idle;
199 struct completion done;
200 int cpu;
201};
202
203static void __cpuinit do_fork_idle(struct work_struct *work)
204{
205 struct create_idle *c_idle =
206 container_of(work, struct create_idle, work);
207
208 c_idle->idle = fork_idle(c_idle->cpu);
209 complete(&c_idle->done);
210}
211
196int __cpuinit __cpu_up(unsigned int cpu) 212int __cpuinit __cpu_up(unsigned int cpu)
197{ 213{
198 struct task_struct *idle; 214 struct task_struct *idle;
@@ -203,8 +219,19 @@ int __cpuinit __cpu_up(unsigned int cpu)
203 * Linux can schedule processes on this slave. 219 * Linux can schedule processes on this slave.
204 */ 220 */
205 if (!cpu_idle_thread[cpu]) { 221 if (!cpu_idle_thread[cpu]) {
206 idle = fork_idle(cpu); 222 /*
207 cpu_idle_thread[cpu] = idle; 223 * Schedule work item to avoid forking user task
224 * Ported from arch/x86/kernel/smpboot.c
225 */
226 struct create_idle c_idle = {
227 .cpu = cpu,
228 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
229 };
230
231 INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
232 schedule_work(&c_idle.work);
233 wait_for_completion(&c_idle.done);
234 idle = cpu_idle_thread[cpu] = c_idle.idle;
208 235
209 if (IS_ERR(idle)) 236 if (IS_ERR(idle))
210 panic(KERN_ERR "Fork failed for CPU %d", cpu); 237 panic(KERN_ERR "Fork failed for CPU %d", cpu);
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 1dc6edff45e0..58beabf50b3c 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -383,12 +383,11 @@ save_static_function(sys_sysmips);
383static int __used noinline 383static int __used noinline
384_sys_sysmips(nabi_no_regargs struct pt_regs regs) 384_sys_sysmips(nabi_no_regargs struct pt_regs regs)
385{ 385{
386 long cmd, arg1, arg2, arg3; 386 long cmd, arg1, arg2;
387 387
388 cmd = regs.regs[4]; 388 cmd = regs.regs[4];
389 arg1 = regs.regs[5]; 389 arg1 = regs.regs[5];
390 arg2 = regs.regs[6]; 390 arg2 = regs.regs[6];
391 arg3 = regs.regs[7];
392 391
393 switch (cmd) { 392 switch (cmd) {
394 case MIPS_ATOMIC_SET: 393 case MIPS_ATOMIC_SET:
@@ -405,7 +404,7 @@ _sys_sysmips(nabi_no_regargs struct pt_regs regs)
405 if (arg1 & 2) 404 if (arg1 & 2)
406 set_thread_flag(TIF_LOGADE); 405 set_thread_flag(TIF_LOGADE);
407 else 406 else
408 clear_thread_flag(TIF_FIXADE); 407 clear_thread_flag(TIF_LOGADE);
409 408
410 return 0; 409 return 0;
411 410
diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index 6a1fdfef8fde..ab52b7cf3b6b 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -148,9 +148,9 @@ struct {
148 spinlock_t tc_list_lock; 148 spinlock_t tc_list_lock;
149 struct list_head tc_list; /* Thread contexts */ 149 struct list_head tc_list; /* Thread contexts */
150} vpecontrol = { 150} vpecontrol = {
151 .vpe_list_lock = SPIN_LOCK_UNLOCKED, 151 .vpe_list_lock = __SPIN_LOCK_UNLOCKED(vpe_list_lock),
152 .vpe_list = LIST_HEAD_INIT(vpecontrol.vpe_list), 152 .vpe_list = LIST_HEAD_INIT(vpecontrol.vpe_list),
153 .tc_list_lock = SPIN_LOCK_UNLOCKED, 153 .tc_list_lock = __SPIN_LOCK_UNLOCKED(tc_list_lock),
154 .tc_list = LIST_HEAD_INIT(vpecontrol.tc_list) 154 .tc_list = LIST_HEAD_INIT(vpecontrol.tc_list)
155}; 155};
156 156
diff --git a/arch/mips/loongson/Kconfig b/arch/mips/loongson/Kconfig
index 6e1b77fec7ea..aca93eed8779 100644
--- a/arch/mips/loongson/Kconfig
+++ b/arch/mips/loongson/Kconfig
@@ -1,6 +1,7 @@
1if MACH_LOONGSON
2
1choice 3choice
2 prompt "Machine Type" 4 prompt "Machine Type"
3 depends on MACH_LOONGSON
4 5
5config LEMOTE_FULOONG2E 6config LEMOTE_FULOONG2E
6 bool "Lemote Fuloong(2e) mini-PC" 7 bool "Lemote Fuloong(2e) mini-PC"
@@ -87,3 +88,5 @@ config LOONGSON_UART_BASE
87config LOONGSON_MC146818 88config LOONGSON_MC146818
88 bool 89 bool
89 default n 90 default n
91
92endif # MACH_LOONGSON
diff --git a/arch/mips/loongson/common/cmdline.c b/arch/mips/loongson/common/cmdline.c
index 1a06defc4f7f..353e1d2e41a5 100644
--- a/arch/mips/loongson/common/cmdline.c
+++ b/arch/mips/loongson/common/cmdline.c
@@ -44,10 +44,5 @@ void __init prom_init_cmdline(void)
44 strcat(arcs_cmdline, " "); 44 strcat(arcs_cmdline, " ");
45 } 45 }
46 46
47 if ((strstr(arcs_cmdline, "console=")) == NULL)
48 strcat(arcs_cmdline, " console=ttyS0,115200");
49 if ((strstr(arcs_cmdline, "root=")) == NULL)
50 strcat(arcs_cmdline, " root=/dev/hda1");
51
52 prom_init_machtype(); 47 prom_init_machtype();
53} 48}
diff --git a/arch/mips/loongson/common/machtype.c b/arch/mips/loongson/common/machtype.c
index 81fbe6b73f91..2efd5d9dee27 100644
--- a/arch/mips/loongson/common/machtype.c
+++ b/arch/mips/loongson/common/machtype.c
@@ -41,7 +41,7 @@ void __weak __init mach_prom_init_machtype(void)
41 41
42void __init prom_init_machtype(void) 42void __init prom_init_machtype(void)
43{ 43{
44 char *p, str[MACHTYPE_LEN]; 44 char *p, str[MACHTYPE_LEN + 1];
45 int machtype = MACH_LEMOTE_FL2E; 45 int machtype = MACH_LEMOTE_FL2E;
46 46
47 mips_machtype = LOONGSON_MACHTYPE; 47 mips_machtype = LOONGSON_MACHTYPE;
@@ -53,6 +53,7 @@ void __init prom_init_machtype(void)
53 } 53 }
54 p += strlen("machtype="); 54 p += strlen("machtype=");
55 strncpy(str, p, MACHTYPE_LEN); 55 strncpy(str, p, MACHTYPE_LEN);
56 str[MACHTYPE_LEN] = '\0';
56 p = strstr(str, " "); 57 p = strstr(str, " ");
57 if (p) 58 if (p)
58 *p = '\0'; 59 *p = '\0';
diff --git a/arch/mips/math-emu/ieee754int.h b/arch/mips/math-emu/ieee754int.h
index 2701d9500959..2a7d43f4f161 100644
--- a/arch/mips/math-emu/ieee754int.h
+++ b/arch/mips/math-emu/ieee754int.h
@@ -70,7 +70,7 @@
70 70
71 71
72#define COMPXSP \ 72#define COMPXSP \
73 unsigned xm; int xe; int xs; int xc 73 unsigned xm; int xe; int xs __maybe_unused; int xc
74 74
75#define COMPYSP \ 75#define COMPYSP \
76 unsigned ym; int ye; int ys; int yc 76 unsigned ym; int ye; int ys; int yc
@@ -104,7 +104,7 @@
104 104
105 105
106#define COMPXDP \ 106#define COMPXDP \
107u64 xm; int xe; int xs; int xc 107u64 xm; int xe; int xs __maybe_unused; int xc
108 108
109#define COMPYDP \ 109#define COMPYDP \
110u64 ym; int ye; int ys; int yc 110u64 ym; int ye; int ys; int yc
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 2efcbd24c82f..279599e9a779 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -324,7 +324,7 @@ int page_is_ram(unsigned long pagenr)
324void __init paging_init(void) 324void __init paging_init(void)
325{ 325{
326 unsigned long max_zone_pfns[MAX_NR_ZONES]; 326 unsigned long max_zone_pfns[MAX_NR_ZONES];
327 unsigned long lastpfn; 327 unsigned long lastpfn __maybe_unused;
328 328
329 pagetable_init(); 329 pagetable_init();
330 330
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 083d3412d0bc..04f9e17db9d0 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -109,6 +109,8 @@ static bool scratchpad_available(void)
109static int scratchpad_offset(int i) 109static int scratchpad_offset(int i)
110{ 110{
111 BUG(); 111 BUG();
112 /* Really unreachable, but evidently some GCC want this. */
113 return 0;
112} 114}
113#endif 115#endif
114/* 116/*
diff --git a/arch/mips/pci/ops-pmcmsp.c b/arch/mips/pci/ops-pmcmsp.c
index b7c03d80c88c..68798f869c0f 100644
--- a/arch/mips/pci/ops-pmcmsp.c
+++ b/arch/mips/pci/ops-pmcmsp.c
@@ -308,7 +308,7 @@ static struct resource pci_mem_resource = {
308 * RETURNS: PCIBIOS_SUCCESSFUL - success 308 * RETURNS: PCIBIOS_SUCCESSFUL - success
309 * 309 *
310 ****************************************************************************/ 310 ****************************************************************************/
311static int bpci_interrupt(int irq, void *dev_id) 311static irqreturn_t bpci_interrupt(int irq, void *dev_id)
312{ 312{
313 struct msp_pci_regs *preg = (void *)PCI_BASE_REG; 313 struct msp_pci_regs *preg = (void *)PCI_BASE_REG;
314 unsigned int stat = preg->if_status; 314 unsigned int stat = preg->if_status;
@@ -326,7 +326,7 @@ static int bpci_interrupt(int irq, void *dev_id)
326 /* write to clear all asserted interrupts */ 326 /* write to clear all asserted interrupts */
327 preg->if_status = stat; 327 preg->if_status = stat;
328 328
329 return PCIBIOS_SUCCESSFUL; 329 return IRQ_HANDLED;
330} 330}
331 331
332/***************************************************************************** 332/*****************************************************************************
diff --git a/arch/mips/pmc-sierra/Kconfig b/arch/mips/pmc-sierra/Kconfig
index c139988bb85d..8d798497c614 100644
--- a/arch/mips/pmc-sierra/Kconfig
+++ b/arch/mips/pmc-sierra/Kconfig
@@ -4,15 +4,11 @@ choice
4 4
5config PMC_MSP4200_EVAL 5config PMC_MSP4200_EVAL
6 bool "PMC-Sierra MSP4200 Eval Board" 6 bool "PMC-Sierra MSP4200 Eval Board"
7 select CEVT_R4K
8 select CSRC_R4K
9 select IRQ_MSP_SLP 7 select IRQ_MSP_SLP
10 select HW_HAS_PCI 8 select HW_HAS_PCI
11 9
12config PMC_MSP4200_GW 10config PMC_MSP4200_GW
13 bool "PMC-Sierra MSP4200 VoIP Gateway" 11 bool "PMC-Sierra MSP4200 VoIP Gateway"
14 select CEVT_R4K
15 select CSRC_R4K
16 select IRQ_MSP_SLP 12 select IRQ_MSP_SLP
17 select HW_HAS_PCI 13 select HW_HAS_PCI
18 14
diff --git a/arch/mips/pmc-sierra/msp71xx/msp_time.c b/arch/mips/pmc-sierra/msp71xx/msp_time.c
index cca64e15f57f..01df84ce31e2 100644
--- a/arch/mips/pmc-sierra/msp71xx/msp_time.c
+++ b/arch/mips/pmc-sierra/msp71xx/msp_time.c
@@ -81,7 +81,7 @@ void __init plat_time_init(void)
81 mips_hpt_frequency = cpu_rate/2; 81 mips_hpt_frequency = cpu_rate/2;
82} 82}
83 83
84unsigned int __init get_c0_compare_int(void) 84unsigned int __cpuinit get_c0_compare_int(void)
85{ 85{
86 return MSP_INT_VPE0_TIMER; 86 return MSP_INT_VPE0_TIMER;
87} 87}
diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h
index 92d2f9298e38..9d773a639513 100644
--- a/arch/mn10300/include/asm/atomic.h
+++ b/arch/mn10300/include/asm/atomic.h
@@ -139,7 +139,7 @@ static inline unsigned long __cmpxchg(volatile unsigned long *m,
139 * Atomically reads the value of @v. Note that the guaranteed 139 * Atomically reads the value of @v. Note that the guaranteed
140 * useful range of an atomic_t is only 24 bits. 140 * useful range of an atomic_t is only 24 bits.
141 */ 141 */
142#define atomic_read(v) ((v)->counter) 142#define atomic_read(v) (ACCESS_ONCE((v)->counter))
143 143
144/** 144/**
145 * atomic_set - set atomic variable 145 * atomic_set - set atomic variable
diff --git a/arch/mn10300/include/asm/uaccess.h b/arch/mn10300/include/asm/uaccess.h
index 679dee0bbd08..3d6e60dad9d9 100644
--- a/arch/mn10300/include/asm/uaccess.h
+++ b/arch/mn10300/include/asm/uaccess.h
@@ -160,9 +160,10 @@ struct __large_struct { unsigned long buf[100]; };
160 160
161#define __get_user_check(x, ptr, size) \ 161#define __get_user_check(x, ptr, size) \
162({ \ 162({ \
163 const __typeof__(ptr) __guc_ptr = (ptr); \
163 int _e; \ 164 int _e; \
164 if (likely(__access_ok((unsigned long) (ptr), (size)))) \ 165 if (likely(__access_ok((unsigned long) __guc_ptr, (size)))) \
165 _e = __get_user_nocheck((x), (ptr), (size)); \ 166 _e = __get_user_nocheck((x), __guc_ptr, (size)); \
166 else { \ 167 else { \
167 _e = -EFAULT; \ 168 _e = -EFAULT; \
168 (x) = (__typeof__(x))0; \ 169 (x) = (__typeof__(x))0; \
diff --git a/arch/mn10300/kernel/time.c b/arch/mn10300/kernel/time.c
index 75da468090b9..5b955000626d 100644
--- a/arch/mn10300/kernel/time.c
+++ b/arch/mn10300/kernel/time.c
@@ -104,8 +104,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
104 unsigned tsc, elapse; 104 unsigned tsc, elapse;
105 irqreturn_t ret; 105 irqreturn_t ret;
106 106
107 write_seqlock(&xtime_lock);
108
109 while (tsc = get_cycles(), 107 while (tsc = get_cycles(),
110 elapse = tsc - mn10300_last_tsc, /* time elapsed since last 108 elapse = tsc - mn10300_last_tsc, /* time elapsed since last
111 * tick */ 109 * tick */
@@ -114,11 +112,9 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
114 mn10300_last_tsc += MN10300_TSC_PER_HZ; 112 mn10300_last_tsc += MN10300_TSC_PER_HZ;
115 113
116 /* advance the kernel's time tracking system */ 114 /* advance the kernel's time tracking system */
117 do_timer(1); 115 xtime_update(1);
118 } 116 }
119 117
120 write_sequnlock(&xtime_lock);
121
122 ret = local_timer_interrupt(); 118 ret = local_timer_interrupt();
123#ifdef CONFIG_SMP 119#ifdef CONFIG_SMP
124 send_IPI_allbutself(LOCAL_TIMER_IPI); 120 send_IPI_allbutself(LOCAL_TIMER_IPI);
diff --git a/arch/mn10300/mm/cache-inv-icache.c b/arch/mn10300/mm/cache-inv-icache.c
index a8933a60b2d4..a6b63dde603d 100644
--- a/arch/mn10300/mm/cache-inv-icache.c
+++ b/arch/mn10300/mm/cache-inv-icache.c
@@ -69,7 +69,7 @@ static void flush_icache_page_range(unsigned long start, unsigned long end)
69 69
70 /* invalidate the icache coverage on that region */ 70 /* invalidate the icache coverage on that region */
71 mn10300_local_icache_inv_range2(addr + off, size); 71 mn10300_local_icache_inv_range2(addr + off, size);
72 smp_cache_call(SMP_ICACHE_INV_FLUSH_RANGE, start, end); 72 smp_cache_call(SMP_ICACHE_INV_RANGE, start, end);
73} 73}
74 74
75/** 75/**
@@ -101,7 +101,7 @@ void flush_icache_range(unsigned long start, unsigned long end)
101 * directly */ 101 * directly */
102 start_page = (start >= 0x80000000UL) ? start : 0x80000000UL; 102 start_page = (start >= 0x80000000UL) ? start : 0x80000000UL;
103 mn10300_icache_inv_range(start_page, end); 103 mn10300_icache_inv_range(start_page, end);
104 smp_cache_call(SMP_ICACHE_INV_FLUSH_RANGE, start, end); 104 smp_cache_call(SMP_ICACHE_INV_RANGE, start, end);
105 if (start_page == start) 105 if (start_page == start)
106 goto done; 106 goto done;
107 end = start_page; 107 end = start_page;
diff --git a/arch/parisc/hpux/sys_hpux.c b/arch/parisc/hpux/sys_hpux.c
index 30394081d9b6..6ab9580b0b00 100644
--- a/arch/parisc/hpux/sys_hpux.c
+++ b/arch/parisc/hpux/sys_hpux.c
@@ -185,26 +185,21 @@ struct hpux_statfs {
185 int16_t f_pad; 185 int16_t f_pad;
186}; 186};
187 187
188static int do_statfs_hpux(struct path *path, struct hpux_statfs *buf) 188static int do_statfs_hpux(struct kstatfs *st, struct hpux_statfs __user *p)
189{ 189{
190 struct kstatfs st; 190 struct hpux_statfs buf;
191 int retval; 191 memset(&buf, 0, sizeof(buf));
192 192 buf.f_type = st->f_type;
193 retval = vfs_statfs(path, &st); 193 buf.f_bsize = st->f_bsize;
194 if (retval) 194 buf.f_blocks = st->f_blocks;
195 return retval; 195 buf.f_bfree = st->f_bfree;
196 196 buf.f_bavail = st->f_bavail;
197 memset(buf, 0, sizeof(*buf)); 197 buf.f_files = st->f_files;
198 buf->f_type = st.f_type; 198 buf.f_ffree = st->f_ffree;
199 buf->f_bsize = st.f_bsize; 199 buf.f_fsid[0] = st->f_fsid.val[0];
200 buf->f_blocks = st.f_blocks; 200 buf.f_fsid[1] = st->f_fsid.val[1];
201 buf->f_bfree = st.f_bfree; 201 if (copy_to_user(p, &buf, sizeof(buf)))
202 buf->f_bavail = st.f_bavail; 202 return -EFAULT;
203 buf->f_files = st.f_files;
204 buf->f_ffree = st.f_ffree;
205 buf->f_fsid[0] = st.f_fsid.val[0];
206 buf->f_fsid[1] = st.f_fsid.val[1];
207
208 return 0; 203 return 0;
209} 204}
210 205
@@ -212,35 +207,19 @@ static int do_statfs_hpux(struct path *path, struct hpux_statfs *buf)
212asmlinkage long hpux_statfs(const char __user *pathname, 207asmlinkage long hpux_statfs(const char __user *pathname,
213 struct hpux_statfs __user *buf) 208 struct hpux_statfs __user *buf)
214{ 209{
215 struct path path; 210 struct kstatfs st;
216 int error; 211 int error = user_statfs(pathname, &st);
217 212 if (!error)
218 error = user_path(pathname, &path); 213 error = do_statfs_hpux(&st, buf);
219 if (!error) {
220 struct hpux_statfs tmp;
221 error = do_statfs_hpux(&path, &tmp);
222 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
223 error = -EFAULT;
224 path_put(&path);
225 }
226 return error; 214 return error;
227} 215}
228 216
229asmlinkage long hpux_fstatfs(unsigned int fd, struct hpux_statfs __user * buf) 217asmlinkage long hpux_fstatfs(unsigned int fd, struct hpux_statfs __user * buf)
230{ 218{
231 struct file *file; 219 struct kstatfs st;
232 struct hpux_statfs tmp; 220 int error = fd_statfs(fd, &st);
233 int error; 221 if (!error)
234 222 error = do_statfs_hpux(&st, buf);
235 error = -EBADF;
236 file = fget(fd);
237 if (!file)
238 goto out;
239 error = do_statfs_hpux(&file->f_path, &tmp);
240 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
241 error = -EFAULT;
242 fput(file);
243 out:
244 return error; 223 return error;
245} 224}
246 225
diff --git a/arch/parisc/include/asm/futex.h b/arch/parisc/include/asm/futex.h
index 0c705c3a55ef..67a33cc27ef2 100644
--- a/arch/parisc/include/asm/futex.h
+++ b/arch/parisc/include/asm/futex.h
@@ -8,7 +8,7 @@
8#include <asm/errno.h> 8#include <asm/errno.h>
9 9
10static inline int 10static inline int
11futex_atomic_op_inuser (int encoded_op, int __user *uaddr) 11futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
12{ 12{
13 int op = (encoded_op >> 28) & 7; 13 int op = (encoded_op >> 28) & 7;
14 int cmp = (encoded_op >> 24) & 15; 14 int cmp = (encoded_op >> 24) & 15;
@@ -18,7 +18,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
19 oparg = 1 << oparg; 19 oparg = 1 << oparg;
20 20
21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) 21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
22 return -EFAULT; 22 return -EFAULT;
23 23
24 pagefault_disable(); 24 pagefault_disable();
@@ -51,10 +51,10 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
51 51
52/* Non-atomic version */ 52/* Non-atomic version */
53static inline int 53static inline int
54futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 54futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
55 u32 oldval, u32 newval)
55{ 56{
56 int err = 0; 57 u32 val;
57 int uval;
58 58
59 /* futex.c wants to do a cmpxchg_inatomic on kernel NULL, which is 59 /* futex.c wants to do a cmpxchg_inatomic on kernel NULL, which is
60 * our gateway page, and causes no end of trouble... 60 * our gateway page, and causes no end of trouble...
@@ -62,15 +62,15 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
62 if (segment_eq(KERNEL_DS, get_fs()) && !uaddr) 62 if (segment_eq(KERNEL_DS, get_fs()) && !uaddr)
63 return -EFAULT; 63 return -EFAULT;
64 64
65 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 65 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
66 return -EFAULT; 66 return -EFAULT;
67 67
68 err = get_user(uval, uaddr); 68 if (get_user(val, uaddr))
69 if (err) return -EFAULT; 69 return -EFAULT;
70 if (uval == oldval) 70 if (val == oldval && put_user(newval, uaddr))
71 err = put_user(newval, uaddr); 71 return -EFAULT;
72 if (err) return -EFAULT; 72 *uval = val;
73 return uval; 73 return 0;
74} 74}
75 75
76#endif /*__KERNEL__*/ 76#endif /*__KERNEL__*/
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index 05511ccb61d2..45b7389d77aa 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -162,11 +162,8 @@ irqreturn_t __irq_entry timer_interrupt(int irq, void *dev_id)
162 update_process_times(user_mode(get_irq_regs())); 162 update_process_times(user_mode(get_irq_regs()));
163 } 163 }
164 164
165 if (cpu == 0) { 165 if (cpu == 0)
166 write_seqlock(&xtime_lock); 166 xtime_update(ticks_elapsed);
167 do_timer(ticks_elapsed);
168 write_sequnlock(&xtime_lock);
169 }
170 167
171 return IRQ_HANDLED; 168 return IRQ_HANDLED;
172} 169}
diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h
index 7c589ef81fb0..c94e4a3fe2ef 100644
--- a/arch/powerpc/include/asm/futex.h
+++ b/arch/powerpc/include/asm/futex.h
@@ -30,7 +30,7 @@
30 : "b" (uaddr), "i" (-EFAULT), "r" (oparg) \ 30 : "b" (uaddr), "i" (-EFAULT), "r" (oparg) \
31 : "cr0", "memory") 31 : "cr0", "memory")
32 32
33static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) 33static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
34{ 34{
35 int op = (encoded_op >> 28) & 7; 35 int op = (encoded_op >> 28) & 7;
36 int cmp = (encoded_op >> 24) & 15; 36 int cmp = (encoded_op >> 24) & 15;
@@ -40,7 +40,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
40 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 40 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
41 oparg = 1 << oparg; 41 oparg = 1 << oparg;
42 42
43 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) 43 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
44 return -EFAULT; 44 return -EFAULT;
45 45
46 pagefault_disable(); 46 pagefault_disable();
@@ -82,35 +82,38 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
82} 82}
83 83
84static inline int 84static inline int
85futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 85futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
86 u32 oldval, u32 newval)
86{ 87{
87 int prev; 88 int ret = 0;
89 u32 prev;
88 90
89 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 91 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
90 return -EFAULT; 92 return -EFAULT;
91 93
92 __asm__ __volatile__ ( 94 __asm__ __volatile__ (
93 PPC_RELEASE_BARRIER 95 PPC_RELEASE_BARRIER
94"1: lwarx %0,0,%2 # futex_atomic_cmpxchg_inatomic\n\ 96"1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\
95 cmpw 0,%0,%3\n\ 97 cmpw 0,%1,%4\n\
96 bne- 3f\n" 98 bne- 3f\n"
97 PPC405_ERR77(0,%2) 99 PPC405_ERR77(0,%3)
98"2: stwcx. %4,0,%2\n\ 100"2: stwcx. %5,0,%3\n\
99 bne- 1b\n" 101 bne- 1b\n"
100 PPC_ACQUIRE_BARRIER 102 PPC_ACQUIRE_BARRIER
101"3: .section .fixup,\"ax\"\n\ 103"3: .section .fixup,\"ax\"\n\
1024: li %0,%5\n\ 1044: li %0,%6\n\
103 b 3b\n\ 105 b 3b\n\
104 .previous\n\ 106 .previous\n\
105 .section __ex_table,\"a\"\n\ 107 .section __ex_table,\"a\"\n\
106 .align 3\n\ 108 .align 3\n\
107 " PPC_LONG "1b,4b,2b,4b\n\ 109 " PPC_LONG "1b,4b,2b,4b\n\
108 .previous" \ 110 .previous" \
109 : "=&r" (prev), "+m" (*uaddr) 111 : "+r" (ret), "=&r" (prev), "+m" (*uaddr)
110 : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT) 112 : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT)
111 : "cc", "memory"); 113 : "cc", "memory");
112 114
113 return prev; 115 *uval = prev;
116 return ret;
114} 117}
115 118
116#endif /* __KERNEL__ */ 119#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index 380d48bacd16..26b8c807f8f1 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -33,9 +33,25 @@
33// 33//
34//---------------------------------------------------------------------------- 34//----------------------------------------------------------------------------
35#include <linux/cache.h> 35#include <linux/cache.h>
36#include <linux/threads.h>
36#include <asm/types.h> 37#include <asm/types.h>
37#include <asm/mmu.h> 38#include <asm/mmu.h>
38 39
40/*
41 * We only have to have statically allocated lppaca structs on
42 * legacy iSeries, which supports at most 64 cpus.
43 */
44#ifdef CONFIG_PPC_ISERIES
45#if NR_CPUS < 64
46#define NR_LPPACAS NR_CPUS
47#else
48#define NR_LPPACAS 64
49#endif
50#else /* not iSeries */
51#define NR_LPPACAS 1
52#endif
53
54
39/* The Hypervisor barfs if the lppaca crosses a page boundary. A 1k 55/* The Hypervisor barfs if the lppaca crosses a page boundary. A 1k
40 * alignment is sufficient to prevent this */ 56 * alignment is sufficient to prevent this */
41struct lppaca { 57struct lppaca {
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 991d5998d6be..fe56a23e1ff0 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -240,6 +240,12 @@ struct machdep_calls {
240 * claims to support kexec. 240 * claims to support kexec.
241 */ 241 */
242 int (*machine_kexec_prepare)(struct kimage *image); 242 int (*machine_kexec_prepare)(struct kimage *image);
243
244 /* Called to perform the _real_ kexec.
245 * Do NOT allocate memory or fail here. We are past the point of
246 * no return.
247 */
248 void (*machine_kexec)(struct kimage *image);
243#endif /* CONFIG_KEXEC */ 249#endif /* CONFIG_KEXEC */
244 250
245#ifdef CONFIG_SUSPEND 251#ifdef CONFIG_SUSPEND
diff --git a/arch/powerpc/include/asm/rwsem.h b/arch/powerpc/include/asm/rwsem.h
index 8447d89fbe72..bb1e2cdeb9bf 100644
--- a/arch/powerpc/include/asm/rwsem.h
+++ b/arch/powerpc/include/asm/rwsem.h
@@ -13,11 +13,6 @@
13 * by Paul Mackerras <paulus@samba.org>. 13 * by Paul Mackerras <paulus@samba.org>.
14 */ 14 */
15 15
16#include <linux/list.h>
17#include <linux/spinlock.h>
18#include <asm/atomic.h>
19#include <asm/system.h>
20
21/* 16/*
22 * the semaphore definition 17 * the semaphore definition
23 */ 18 */
@@ -33,47 +28,6 @@
33#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS 28#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
34#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 29#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
35 30
36struct rw_semaphore {
37 long count;
38 spinlock_t wait_lock;
39 struct list_head wait_list;
40#ifdef CONFIG_DEBUG_LOCK_ALLOC
41 struct lockdep_map dep_map;
42#endif
43};
44
45#ifdef CONFIG_DEBUG_LOCK_ALLOC
46# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
47#else
48# define __RWSEM_DEP_MAP_INIT(lockname)
49#endif
50
51#define __RWSEM_INITIALIZER(name) \
52{ \
53 RWSEM_UNLOCKED_VALUE, \
54 __SPIN_LOCK_UNLOCKED((name).wait_lock), \
55 LIST_HEAD_INIT((name).wait_list) \
56 __RWSEM_DEP_MAP_INIT(name) \
57}
58
59#define DECLARE_RWSEM(name) \
60 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
61
62extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
63extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
64extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
65extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
66
67extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
68 struct lock_class_key *key);
69
70#define init_rwsem(sem) \
71 do { \
72 static struct lock_class_key __key; \
73 \
74 __init_rwsem((sem), #sem, &__key); \
75 } while (0)
76
77/* 31/*
78 * lock for reading 32 * lock for reading
79 */ 33 */
@@ -174,10 +128,5 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
174 return atomic_long_add_return(delta, (atomic_long_t *)&sem->count); 128 return atomic_long_add_return(delta, (atomic_long_t *)&sem->count);
175} 129}
176 130
177static inline int rwsem_is_locked(struct rw_semaphore *sem)
178{
179 return sem->count != 0;
180}
181
182#endif /* __KERNEL__ */ 131#endif /* __KERNEL__ */
183#endif /* _ASM_POWERPC_RWSEM_H */ 132#endif /* _ASM_POWERPC_RWSEM_H */
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index 49a170af8145..a5f8672eeff3 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -87,7 +87,10 @@ void machine_kexec(struct kimage *image)
87 87
88 save_ftrace_enabled = __ftrace_enabled_save(); 88 save_ftrace_enabled = __ftrace_enabled_save();
89 89
90 default_machine_kexec(image); 90 if (ppc_md.machine_kexec)
91 ppc_md.machine_kexec(image);
92 else
93 default_machine_kexec(image);
91 94
92 __ftrace_enabled_restore(save_ftrace_enabled); 95 __ftrace_enabled_restore(save_ftrace_enabled);
93 96
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index ebf9846f3c3b..f4adf89d7614 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -27,20 +27,6 @@ extern unsigned long __toc_start;
27#ifdef CONFIG_PPC_BOOK3S 27#ifdef CONFIG_PPC_BOOK3S
28 28
29/* 29/*
30 * We only have to have statically allocated lppaca structs on
31 * legacy iSeries, which supports at most 64 cpus.
32 */
33#ifdef CONFIG_PPC_ISERIES
34#if NR_CPUS < 64
35#define NR_LPPACAS NR_CPUS
36#else
37#define NR_LPPACAS 64
38#endif
39#else /* not iSeries */
40#define NR_LPPACAS 1
41#endif
42
43/*
44 * The structure which the hypervisor knows about - this structure 30 * The structure which the hypervisor knows about - this structure
45 * should not cross a page boundary. The vpa_init/register_vpa call 31 * should not cross a page boundary. The vpa_init/register_vpa call
46 * is now known to fail if the lppaca structure crosses a page 32 * is now known to fail if the lppaca structure crosses a page
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 7a1d5cb76932..8303a6c65ef7 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -353,6 +353,7 @@ static void switch_booke_debug_regs(struct thread_struct *new_thread)
353 prime_debug_regs(new_thread); 353 prime_debug_regs(new_thread);
354} 354}
355#else /* !CONFIG_PPC_ADV_DEBUG_REGS */ 355#else /* !CONFIG_PPC_ADV_DEBUG_REGS */
356#ifndef CONFIG_HAVE_HW_BREAKPOINT
356static void set_debug_reg_defaults(struct thread_struct *thread) 357static void set_debug_reg_defaults(struct thread_struct *thread)
357{ 358{
358 if (thread->dabr) { 359 if (thread->dabr) {
@@ -360,6 +361,7 @@ static void set_debug_reg_defaults(struct thread_struct *thread)
360 set_dabr(0); 361 set_dabr(0);
361 } 362 }
362} 363}
364#endif /* !CONFIG_HAVE_HW_BREAKPOINT */
363#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ 365#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
364 366
365int set_dabr(unsigned long dabr) 367int set_dabr(unsigned long dabr)
@@ -670,11 +672,11 @@ void flush_thread(void)
670{ 672{
671 discard_lazy_cpu_state(); 673 discard_lazy_cpu_state();
672 674
673#ifdef CONFIG_HAVE_HW_BREAKPOINTS 675#ifdef CONFIG_HAVE_HW_BREAKPOINT
674 flush_ptrace_hw_breakpoint(current); 676 flush_ptrace_hw_breakpoint(current);
675#else /* CONFIG_HAVE_HW_BREAKPOINTS */ 677#else /* CONFIG_HAVE_HW_BREAKPOINT */
676 set_debug_reg_defaults(&current->thread); 678 set_debug_reg_defaults(&current->thread);
677#endif /* CONFIG_HAVE_HW_BREAKPOINTS */ 679#endif /* CONFIG_HAVE_HW_BREAKPOINT */
678} 680}
679 681
680void 682void
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index fd4812329570..0dc95c0aa3be 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1516,7 +1516,8 @@ int start_topology_update(void)
1516{ 1516{
1517 int rc = 0; 1517 int rc = 0;
1518 1518
1519 if (firmware_has_feature(FW_FEATURE_VPHN) && 1519 /* Disabled until races with load balancing are fixed */
1520 if (0 && firmware_has_feature(FW_FEATURE_VPHN) &&
1520 get_lppaca()->shared_proc) { 1521 get_lppaca()->shared_proc) {
1521 vphn_enabled = 1; 1522 vphn_enabled = 1;
1522 setup_cpu_associativity_change_counters(); 1523 setup_cpu_associativity_change_counters();
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 1ec06576f619..c14d09f614f3 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -38,13 +38,11 @@ DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
38 * neesd to be flushed. This function will either perform the flush 38 * neesd to be flushed. This function will either perform the flush
39 * immediately or will batch it up if the current CPU has an active 39 * immediately or will batch it up if the current CPU has an active
40 * batch on it. 40 * batch on it.
41 *
42 * Must be called from within some kind of spinlock/non-preempt region...
43 */ 41 */
44void hpte_need_flush(struct mm_struct *mm, unsigned long addr, 42void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
45 pte_t *ptep, unsigned long pte, int huge) 43 pte_t *ptep, unsigned long pte, int huge)
46{ 44{
47 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); 45 struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);
48 unsigned long vsid, vaddr; 46 unsigned long vsid, vaddr;
49 unsigned int psize; 47 unsigned int psize;
50 int ssize; 48 int ssize;
@@ -99,6 +97,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
99 */ 97 */
100 if (!batch->active) { 98 if (!batch->active) {
101 flush_hash_page(vaddr, rpte, psize, ssize, 0); 99 flush_hash_page(vaddr, rpte, psize, ssize, 0);
100 put_cpu_var(ppc64_tlb_batch);
102 return; 101 return;
103 } 102 }
104 103
@@ -127,6 +126,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
127 batch->index = ++i; 126 batch->index = ++i;
128 if (i >= PPC64_TLB_BATCH_NR) 127 if (i >= PPC64_TLB_BATCH_NR)
129 __flush_tlb_pending(batch); 128 __flush_tlb_pending(batch);
129 put_cpu_var(ppc64_tlb_batch);
130} 130}
131 131
132/* 132/*
diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c
index 187a7d32f86a..a3d2ce54ea2e 100644
--- a/arch/powerpc/platforms/cell/spufs/syscalls.c
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -70,7 +70,7 @@ static long do_spu_create(const char __user *pathname, unsigned int flags,
70 if (!IS_ERR(tmp)) { 70 if (!IS_ERR(tmp)) {
71 struct nameidata nd; 71 struct nameidata nd;
72 72
73 ret = path_lookup(tmp, LOOKUP_PARENT, &nd); 73 ret = kern_path_parent(tmp, &nd);
74 if (!ret) { 74 if (!ret) {
75 nd.flags |= LOOKUP_OPEN | LOOKUP_CREATE; 75 nd.flags |= LOOKUP_OPEN | LOOKUP_CREATE;
76 ret = spufs_create(&nd, flags, mode, neighbor); 76 ret = spufs_create(&nd, flags, mode, neighbor);
diff --git a/arch/powerpc/platforms/iseries/dt.c b/arch/powerpc/platforms/iseries/dt.c
index fdb7384c0c4f..f0491cc28900 100644
--- a/arch/powerpc/platforms/iseries/dt.c
+++ b/arch/powerpc/platforms/iseries/dt.c
@@ -242,8 +242,8 @@ static void __init dt_cpus(struct iseries_flat_dt *dt)
242 pft_size[0] = 0; /* NUMA CEC cookie, 0 for non NUMA */ 242 pft_size[0] = 0; /* NUMA CEC cookie, 0 for non NUMA */
243 pft_size[1] = __ilog2(HvCallHpt_getHptPages() * HW_PAGE_SIZE); 243 pft_size[1] = __ilog2(HvCallHpt_getHptPages() * HW_PAGE_SIZE);
244 244
245 for (i = 0; i < NR_CPUS; i++) { 245 for (i = 0; i < NR_LPPACAS; i++) {
246 if (lppaca_of(i).dyn_proc_status >= 2) 246 if (lppaca[i].dyn_proc_status >= 2)
247 continue; 247 continue;
248 248
249 snprintf(p, 32 - (p - buf), "@%d", i); 249 snprintf(p, 32 - (p - buf), "@%d", i);
@@ -251,7 +251,7 @@ static void __init dt_cpus(struct iseries_flat_dt *dt)
251 251
252 dt_prop_str(dt, "device_type", device_type_cpu); 252 dt_prop_str(dt, "device_type", device_type_cpu);
253 253
254 index = lppaca_of(i).dyn_hv_phys_proc_index; 254 index = lppaca[i].dyn_hv_phys_proc_index;
255 d = &xIoHriProcessorVpd[index]; 255 d = &xIoHriProcessorVpd[index];
256 256
257 dt_prop_u32(dt, "i-cache-size", d->xInstCacheSize * 1024); 257 dt_prop_u32(dt, "i-cache-size", d->xInstCacheSize * 1024);
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index b0863410517f..2946ae10fbfd 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -680,6 +680,7 @@ void * __init iSeries_early_setup(void)
680 * on but calling this function multiple times is fine. 680 * on but calling this function multiple times is fine.
681 */ 681 */
682 identify_cpu(0, mfspr(SPRN_PVR)); 682 identify_cpu(0, mfspr(SPRN_PVR));
683 initialise_paca(&boot_paca, 0);
683 684
684 powerpc_firmware_features |= FW_FEATURE_ISERIES; 685 powerpc_firmware_features |= FW_FEATURE_ISERIES;
685 powerpc_firmware_features |= FW_FEATURE_LPAR; 686 powerpc_firmware_features |= FW_FEATURE_LPAR;
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index 5c5d02de49e9..81cf36b691f1 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -7,7 +7,7 @@
7#include <linux/uaccess.h> 7#include <linux/uaccess.h>
8#include <asm/errno.h> 8#include <asm/errno.h>
9 9
10static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) 10static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
11{ 11{
12 int op = (encoded_op >> 28) & 7; 12 int op = (encoded_op >> 28) & 7;
13 int cmp = (encoded_op >> 24) & 15; 13 int cmp = (encoded_op >> 24) & 15;
@@ -18,7 +18,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 18 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
19 oparg = 1 << oparg; 19 oparg = 1 << oparg;
20 20
21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) 21 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
22 return -EFAULT; 22 return -EFAULT;
23 23
24 pagefault_disable(); 24 pagefault_disable();
@@ -39,13 +39,13 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
39 return ret; 39 return ret;
40} 40}
41 41
42static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, 42static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
43 int oldval, int newval) 43 u32 oldval, u32 newval)
44{ 44{
45 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) 45 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
46 return -EFAULT; 46 return -EFAULT;
47 47
48 return uaccess.futex_atomic_cmpxchg(uaddr, oldval, newval); 48 return uaccess.futex_atomic_cmpxchg(uval, uaddr, oldval, newval);
49} 49}
50 50
51#endif /* __KERNEL__ */ 51#endif /* __KERNEL__ */
diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h
index 423fdda2322d..d0eb4653cebd 100644
--- a/arch/s390/include/asm/rwsem.h
+++ b/arch/s390/include/asm/rwsem.h
@@ -43,29 +43,6 @@
43 43
44#ifdef __KERNEL__ 44#ifdef __KERNEL__
45 45
46#include <linux/list.h>
47#include <linux/spinlock.h>
48
49struct rwsem_waiter;
50
51extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *);
52extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *);
53extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
54extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *);
55extern struct rw_semaphore *rwsem_downgrade_write(struct rw_semaphore *);
56
57/*
58 * the semaphore definition
59 */
60struct rw_semaphore {
61 signed long count;
62 spinlock_t wait_lock;
63 struct list_head wait_list;
64#ifdef CONFIG_DEBUG_LOCK_ALLOC
65 struct lockdep_map dep_map;
66#endif
67};
68
69#ifndef __s390x__ 46#ifndef __s390x__
70#define RWSEM_UNLOCKED_VALUE 0x00000000 47#define RWSEM_UNLOCKED_VALUE 0x00000000
71#define RWSEM_ACTIVE_BIAS 0x00000001 48#define RWSEM_ACTIVE_BIAS 0x00000001
@@ -81,41 +58,6 @@ struct rw_semaphore {
81#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 58#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
82 59
83/* 60/*
84 * initialisation
85 */
86
87#ifdef CONFIG_DEBUG_LOCK_ALLOC
88# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
89#else
90# define __RWSEM_DEP_MAP_INIT(lockname)
91#endif
92
93#define __RWSEM_INITIALIZER(name) \
94 { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait.lock), \
95 LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
96
97#define DECLARE_RWSEM(name) \
98 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
99
100static inline void init_rwsem(struct rw_semaphore *sem)
101{
102 sem->count = RWSEM_UNLOCKED_VALUE;
103 spin_lock_init(&sem->wait_lock);
104 INIT_LIST_HEAD(&sem->wait_list);
105}
106
107extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
108 struct lock_class_key *key);
109
110#define init_rwsem(sem) \
111do { \
112 static struct lock_class_key __key; \
113 \
114 __init_rwsem((sem), #sem, &__key); \
115} while (0)
116
117
118/*
119 * lock for reading 61 * lock for reading
120 */ 62 */
121static inline void __down_read(struct rw_semaphore *sem) 63static inline void __down_read(struct rw_semaphore *sem)
@@ -377,10 +319,5 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
377 return new; 319 return new;
378} 320}
379 321
380static inline int rwsem_is_locked(struct rw_semaphore *sem)
381{
382 return (sem->count != 0);
383}
384
385#endif /* __KERNEL__ */ 322#endif /* __KERNEL__ */
386#endif /* _S390_RWSEM_H */ 323#endif /* _S390_RWSEM_H */
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index d6b1ed0ec52b..2d9ea11f919a 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -83,8 +83,8 @@ struct uaccess_ops {
83 size_t (*clear_user)(size_t, void __user *); 83 size_t (*clear_user)(size_t, void __user *);
84 size_t (*strnlen_user)(size_t, const char __user *); 84 size_t (*strnlen_user)(size_t, const char __user *);
85 size_t (*strncpy_from_user)(size_t, const char __user *, char *); 85 size_t (*strncpy_from_user)(size_t, const char __user *, char *);
86 int (*futex_atomic_op)(int op, int __user *, int oparg, int *old); 86 int (*futex_atomic_op)(int op, u32 __user *, int oparg, int *old);
87 int (*futex_atomic_cmpxchg)(int __user *, int old, int new); 87 int (*futex_atomic_cmpxchg)(u32 *, u32 __user *, u32 old, u32 new);
88}; 88};
89 89
90extern struct uaccess_ops uaccess; 90extern struct uaccess_ops uaccess;
diff --git a/arch/s390/lib/uaccess.h b/arch/s390/lib/uaccess.h
index 126011df14f1..1d2536cb630b 100644
--- a/arch/s390/lib/uaccess.h
+++ b/arch/s390/lib/uaccess.h
@@ -12,12 +12,12 @@ extern size_t copy_from_user_std(size_t, const void __user *, void *);
12extern size_t copy_to_user_std(size_t, void __user *, const void *); 12extern size_t copy_to_user_std(size_t, void __user *, const void *);
13extern size_t strnlen_user_std(size_t, const char __user *); 13extern size_t strnlen_user_std(size_t, const char __user *);
14extern size_t strncpy_from_user_std(size_t, const char __user *, char *); 14extern size_t strncpy_from_user_std(size_t, const char __user *, char *);
15extern int futex_atomic_cmpxchg_std(int __user *, int, int); 15extern int futex_atomic_cmpxchg_std(u32 *, u32 __user *, u32, u32);
16extern int futex_atomic_op_std(int, int __user *, int, int *); 16extern int futex_atomic_op_std(int, u32 __user *, int, int *);
17 17
18extern size_t copy_from_user_pt(size_t, const void __user *, void *); 18extern size_t copy_from_user_pt(size_t, const void __user *, void *);
19extern size_t copy_to_user_pt(size_t, void __user *, const void *); 19extern size_t copy_to_user_pt(size_t, void __user *, const void *);
20extern int futex_atomic_op_pt(int, int __user *, int, int *); 20extern int futex_atomic_op_pt(int, u32 __user *, int, int *);
21extern int futex_atomic_cmpxchg_pt(int __user *, int, int); 21extern int futex_atomic_cmpxchg_pt(u32 *, u32 __user *, u32, u32);
22 22
23#endif /* __ARCH_S390_LIB_UACCESS_H */ 23#endif /* __ARCH_S390_LIB_UACCESS_H */
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index 404f2de296dc..74833831417f 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -302,7 +302,7 @@ fault:
302 : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ 302 : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
303 "m" (*uaddr) : "cc" ); 303 "m" (*uaddr) : "cc" );
304 304
305static int __futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old) 305static int __futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old)
306{ 306{
307 int oldval = 0, newval, ret; 307 int oldval = 0, newval, ret;
308 308
@@ -335,7 +335,7 @@ static int __futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
335 return ret; 335 return ret;
336} 336}
337 337
338int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old) 338int futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old)
339{ 339{
340 int ret; 340 int ret;
341 341
@@ -354,26 +354,29 @@ int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
354 return ret; 354 return ret;
355} 355}
356 356
357static int __futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval) 357static int __futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr,
358 u32 oldval, u32 newval)
358{ 359{
359 int ret; 360 int ret;
360 361
361 asm volatile("0: cs %1,%4,0(%5)\n" 362 asm volatile("0: cs %1,%4,0(%5)\n"
362 "1: lr %0,%1\n" 363 "1: la %0,0\n"
363 "2:\n" 364 "2:\n"
364 EX_TABLE(0b,2b) EX_TABLE(1b,2b) 365 EX_TABLE(0b,2b) EX_TABLE(1b,2b)
365 : "=d" (ret), "+d" (oldval), "=m" (*uaddr) 366 : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
366 : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) 367 : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
367 : "cc", "memory" ); 368 : "cc", "memory" );
369 *uval = oldval;
368 return ret; 370 return ret;
369} 371}
370 372
371int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval) 373int futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr,
374 u32 oldval, u32 newval)
372{ 375{
373 int ret; 376 int ret;
374 377
375 if (segment_eq(get_fs(), KERNEL_DS)) 378 if (segment_eq(get_fs(), KERNEL_DS))
376 return __futex_atomic_cmpxchg_pt(uaddr, oldval, newval); 379 return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval);
377 spin_lock(&current->mm->page_table_lock); 380 spin_lock(&current->mm->page_table_lock);
378 uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr); 381 uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
379 if (!uaddr) { 382 if (!uaddr) {
@@ -382,7 +385,7 @@ int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
382 } 385 }
383 get_page(virt_to_page(uaddr)); 386 get_page(virt_to_page(uaddr));
384 spin_unlock(&current->mm->page_table_lock); 387 spin_unlock(&current->mm->page_table_lock);
385 ret = __futex_atomic_cmpxchg_pt(uaddr, oldval, newval); 388 ret = __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval);
386 put_page(virt_to_page(uaddr)); 389 put_page(virt_to_page(uaddr));
387 return ret; 390 return ret;
388} 391}
diff --git a/arch/s390/lib/uaccess_std.c b/arch/s390/lib/uaccess_std.c
index a6c4f7ed24a4..bb1a7eed42ce 100644
--- a/arch/s390/lib/uaccess_std.c
+++ b/arch/s390/lib/uaccess_std.c
@@ -255,7 +255,7 @@ size_t strncpy_from_user_std(size_t size, const char __user *src, char *dst)
255 : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ 255 : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
256 "m" (*uaddr) : "cc"); 256 "m" (*uaddr) : "cc");
257 257
258int futex_atomic_op_std(int op, int __user *uaddr, int oparg, int *old) 258int futex_atomic_op_std(int op, u32 __user *uaddr, int oparg, int *old)
259{ 259{
260 int oldval = 0, newval, ret; 260 int oldval = 0, newval, ret;
261 261
@@ -287,19 +287,21 @@ int futex_atomic_op_std(int op, int __user *uaddr, int oparg, int *old)
287 return ret; 287 return ret;
288} 288}
289 289
290int futex_atomic_cmpxchg_std(int __user *uaddr, int oldval, int newval) 290int futex_atomic_cmpxchg_std(u32 *uval, u32 __user *uaddr,
291 u32 oldval, u32 newval)
291{ 292{
292 int ret; 293 int ret;
293 294
294 asm volatile( 295 asm volatile(
295 " sacf 256\n" 296 " sacf 256\n"
296 "0: cs %1,%4,0(%5)\n" 297 "0: cs %1,%4,0(%5)\n"
297 "1: lr %0,%1\n" 298 "1: la %0,0\n"
298 "2: sacf 0\n" 299 "2: sacf 0\n"
299 EX_TABLE(0b,2b) EX_TABLE(1b,2b) 300 EX_TABLE(0b,2b) EX_TABLE(1b,2b)
300 : "=d" (ret), "+d" (oldval), "=m" (*uaddr) 301 : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
301 : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) 302 : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
302 : "cc", "memory" ); 303 : "cc", "memory" );
304 *uval = oldval;
303 return ret; 305 return ret;
304} 306}
305 307
diff --git a/arch/sh/include/asm/futex-irq.h b/arch/sh/include/asm/futex-irq.h
index a9f16a7f9aea..6cb9f193a95e 100644
--- a/arch/sh/include/asm/futex-irq.h
+++ b/arch/sh/include/asm/futex-irq.h
@@ -3,7 +3,7 @@
3 3
4#include <asm/system.h> 4#include <asm/system.h>
5 5
6static inline int atomic_futex_op_xchg_set(int oparg, int __user *uaddr, 6static inline int atomic_futex_op_xchg_set(int oparg, u32 __user *uaddr,
7 int *oldval) 7 int *oldval)
8{ 8{
9 unsigned long flags; 9 unsigned long flags;
@@ -20,7 +20,7 @@ static inline int atomic_futex_op_xchg_set(int oparg, int __user *uaddr,
20 return ret; 20 return ret;
21} 21}
22 22
23static inline int atomic_futex_op_xchg_add(int oparg, int __user *uaddr, 23static inline int atomic_futex_op_xchg_add(int oparg, u32 __user *uaddr,
24 int *oldval) 24 int *oldval)
25{ 25{
26 unsigned long flags; 26 unsigned long flags;
@@ -37,7 +37,7 @@ static inline int atomic_futex_op_xchg_add(int oparg, int __user *uaddr,
37 return ret; 37 return ret;
38} 38}
39 39
40static inline int atomic_futex_op_xchg_or(int oparg, int __user *uaddr, 40static inline int atomic_futex_op_xchg_or(int oparg, u32 __user *uaddr,
41 int *oldval) 41 int *oldval)
42{ 42{
43 unsigned long flags; 43 unsigned long flags;
@@ -54,7 +54,7 @@ static inline int atomic_futex_op_xchg_or(int oparg, int __user *uaddr,
54 return ret; 54 return ret;
55} 55}
56 56
57static inline int atomic_futex_op_xchg_and(int oparg, int __user *uaddr, 57static inline int atomic_futex_op_xchg_and(int oparg, u32 __user *uaddr,
58 int *oldval) 58 int *oldval)
59{ 59{
60 unsigned long flags; 60 unsigned long flags;
@@ -71,7 +71,7 @@ static inline int atomic_futex_op_xchg_and(int oparg, int __user *uaddr,
71 return ret; 71 return ret;
72} 72}
73 73
74static inline int atomic_futex_op_xchg_xor(int oparg, int __user *uaddr, 74static inline int atomic_futex_op_xchg_xor(int oparg, u32 __user *uaddr,
75 int *oldval) 75 int *oldval)
76{ 76{
77 unsigned long flags; 77 unsigned long flags;
@@ -88,11 +88,13 @@ static inline int atomic_futex_op_xchg_xor(int oparg, int __user *uaddr,
88 return ret; 88 return ret;
89} 89}
90 90
91static inline int atomic_futex_op_cmpxchg_inatomic(int __user *uaddr, 91static inline int atomic_futex_op_cmpxchg_inatomic(u32 *uval,
92 int oldval, int newval) 92 u32 __user *uaddr,
93 u32 oldval, u32 newval)
93{ 94{
94 unsigned long flags; 95 unsigned long flags;
95 int ret, prev = 0; 96 int ret;
97 u32 prev = 0;
96 98
97 local_irq_save(flags); 99 local_irq_save(flags);
98 100
@@ -102,10 +104,8 @@ static inline int atomic_futex_op_cmpxchg_inatomic(int __user *uaddr,
102 104
103 local_irq_restore(flags); 105 local_irq_restore(flags);
104 106
105 if (ret) 107 *uval = prev;
106 return ret; 108 return ret;
107
108 return prev;
109} 109}
110 110
111#endif /* __ASM_SH_FUTEX_IRQ_H */ 111#endif /* __ASM_SH_FUTEX_IRQ_H */
diff --git a/arch/sh/include/asm/futex.h b/arch/sh/include/asm/futex.h
index 68256ec5fa35..7be39a646fbd 100644
--- a/arch/sh/include/asm/futex.h
+++ b/arch/sh/include/asm/futex.h
@@ -10,7 +10,7 @@
10/* XXX: UP variants, fix for SH-4A and SMP.. */ 10/* XXX: UP variants, fix for SH-4A and SMP.. */
11#include <asm/futex-irq.h> 11#include <asm/futex-irq.h>
12 12
13static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) 13static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
14{ 14{
15 int op = (encoded_op >> 28) & 7; 15 int op = (encoded_op >> 28) & 7;
16 int cmp = (encoded_op >> 24) & 15; 16 int cmp = (encoded_op >> 24) & 15;
@@ -21,7 +21,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
21 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 21 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
22 oparg = 1 << oparg; 22 oparg = 1 << oparg;
23 23
24 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 24 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
25 return -EFAULT; 25 return -EFAULT;
26 26
27 pagefault_disable(); 27 pagefault_disable();
@@ -65,12 +65,13 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
65} 65}
66 66
67static inline int 67static inline int
68futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 68futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
69 u32 oldval, u32 newval)
69{ 70{
70 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 71 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
71 return -EFAULT; 72 return -EFAULT;
72 73
73 return atomic_futex_op_cmpxchg_inatomic(uaddr, oldval, newval); 74 return atomic_futex_op_cmpxchg_inatomic(uval, uaddr, oldval, newval);
74} 75}
75 76
76#endif /* __KERNEL__ */ 77#endif /* __KERNEL__ */
diff --git a/arch/sh/include/asm/rwsem.h b/arch/sh/include/asm/rwsem.h
index 06e2251a5e48..edab57265293 100644
--- a/arch/sh/include/asm/rwsem.h
+++ b/arch/sh/include/asm/rwsem.h
@@ -11,64 +11,13 @@
11#endif 11#endif
12 12
13#ifdef __KERNEL__ 13#ifdef __KERNEL__
14#include <linux/list.h>
15#include <linux/spinlock.h>
16#include <asm/atomic.h>
17#include <asm/system.h>
18 14
19/*
20 * the semaphore definition
21 */
22struct rw_semaphore {
23 long count;
24#define RWSEM_UNLOCKED_VALUE 0x00000000 15#define RWSEM_UNLOCKED_VALUE 0x00000000
25#define RWSEM_ACTIVE_BIAS 0x00000001 16#define RWSEM_ACTIVE_BIAS 0x00000001
26#define RWSEM_ACTIVE_MASK 0x0000ffff 17#define RWSEM_ACTIVE_MASK 0x0000ffff
27#define RWSEM_WAITING_BIAS (-0x00010000) 18#define RWSEM_WAITING_BIAS (-0x00010000)
28#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS 19#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
29#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 20#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
30 spinlock_t wait_lock;
31 struct list_head wait_list;
32#ifdef CONFIG_DEBUG_LOCK_ALLOC
33 struct lockdep_map dep_map;
34#endif
35};
36
37#ifdef CONFIG_DEBUG_LOCK_ALLOC
38# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
39#else
40# define __RWSEM_DEP_MAP_INIT(lockname)
41#endif
42
43#define __RWSEM_INITIALIZER(name) \
44 { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
45 LIST_HEAD_INIT((name).wait_list) \
46 __RWSEM_DEP_MAP_INIT(name) }
47
48#define DECLARE_RWSEM(name) \
49 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
50
51extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
52extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
53extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
54extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
55
56extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
57 struct lock_class_key *key);
58
59#define init_rwsem(sem) \
60do { \
61 static struct lock_class_key __key; \
62 \
63 __init_rwsem((sem), #sem, &__key); \
64} while (0)
65
66static inline void init_rwsem(struct rw_semaphore *sem)
67{
68 sem->count = RWSEM_UNLOCKED_VALUE;
69 spin_lock_init(&sem->wait_lock);
70 INIT_LIST_HEAD(&sem->wait_list);
71}
72 21
73/* 22/*
74 * lock for reading 23 * lock for reading
@@ -179,10 +128,5 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
179 return atomic_add_return(delta, (atomic_t *)(&sem->count)); 128 return atomic_add_return(delta, (atomic_t *)(&sem->count));
180} 129}
181 130
182static inline int rwsem_is_locked(struct rw_semaphore *sem)
183{
184 return (sem->count != 0);
185}
186
187#endif /* __KERNEL__ */ 131#endif /* __KERNEL__ */
188#endif /* _ASM_SH_RWSEM_H */ 132#endif /* _ASM_SH_RWSEM_H */
diff --git a/arch/sh/include/asm/sections.h b/arch/sh/include/asm/sections.h
index a78701da775b..4a5350037c8f 100644
--- a/arch/sh/include/asm/sections.h
+++ b/arch/sh/include/asm/sections.h
@@ -3,7 +3,7 @@
3 3
4#include <asm-generic/sections.h> 4#include <asm-generic/sections.h>
5 5
6extern void __nosave_begin, __nosave_end; 6extern long __nosave_begin, __nosave_end;
7extern long __machvec_start, __machvec_end; 7extern long __machvec_start, __machvec_end;
8extern char __uncached_start, __uncached_end; 8extern char __uncached_start, __uncached_end;
9extern char _ebss[]; 9extern char _ebss[];
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7750.c b/arch/sh/kernel/cpu/sh4/setup-sh7750.c
index 672944f5b19c..e53b4b38bd11 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh7750.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh7750.c
@@ -14,7 +14,7 @@
14#include <linux/io.h> 14#include <linux/io.h>
15#include <linux/sh_timer.h> 15#include <linux/sh_timer.h>
16#include <linux/serial_sci.h> 16#include <linux/serial_sci.h>
17#include <asm/machtypes.h> 17#include <generated/machtypes.h>
18 18
19static struct resource rtc_resources[] = { 19static struct resource rtc_resources[] = {
20 [0] = { 20 [0] = {
@@ -255,12 +255,17 @@ static struct platform_device *sh7750_early_devices[] __initdata = {
255 255
256void __init plat_early_device_setup(void) 256void __init plat_early_device_setup(void)
257{ 257{
258 struct platform_device *dev[1];
259
258 if (mach_is_rts7751r2d()) { 260 if (mach_is_rts7751r2d()) {
259 scif_platform_data.scscr |= SCSCR_CKE1; 261 scif_platform_data.scscr |= SCSCR_CKE1;
260 early_platform_add_devices(&scif_device, 1); 262 dev[0] = &scif_device;
263 early_platform_add_devices(dev, 1);
261 } else { 264 } else {
262 early_platform_add_devices(&sci_device, 1); 265 dev[0] = &sci_device;
263 early_platform_add_devices(&scif_device, 1); 266 early_platform_add_devices(dev, 1);
267 dev[0] = &scif_device;
268 early_platform_add_devices(dev, 1);
264 } 269 }
265 270
266 early_platform_add_devices(sh7750_early_devices, 271 early_platform_add_devices(sh7750_early_devices,
diff --git a/arch/sh/lib/delay.c b/arch/sh/lib/delay.c
index faa8f86c0db4..0901b2f14e15 100644
--- a/arch/sh/lib/delay.c
+++ b/arch/sh/lib/delay.c
@@ -10,6 +10,16 @@
10void __delay(unsigned long loops) 10void __delay(unsigned long loops)
11{ 11{
12 __asm__ __volatile__( 12 __asm__ __volatile__(
13 /*
14 * ST40-300 appears to have an issue with this code,
15 * normally taking two cycles each loop, as with all
16 * other SH variants. If however the branch and the
17 * delay slot straddle an 8 byte boundary, this increases
18 * to 3 cycles.
19 * This align directive ensures this doesn't occur.
20 */
21 ".balign 8\n\t"
22
13 "tst %0, %0\n\t" 23 "tst %0, %0\n\t"
14 "1:\t" 24 "1:\t"
15 "bf/s 1b\n\t" 25 "bf/s 1b\n\t"
diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c
index 88d3dc3d30d5..5a580ea04429 100644
--- a/arch/sh/mm/cache.c
+++ b/arch/sh/mm/cache.c
@@ -108,7 +108,8 @@ void copy_user_highpage(struct page *to, struct page *from,
108 kunmap_atomic(vfrom, KM_USER0); 108 kunmap_atomic(vfrom, KM_USER0);
109 } 109 }
110 110
111 if (pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK)) 111 if (pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK) ||
112 (vma->vm_flags & VM_EXEC))
112 __flush_purge_region(vto, PAGE_SIZE); 113 __flush_purge_region(vto, PAGE_SIZE);
113 114
114 kunmap_atomic(vto, KM_USER1); 115 kunmap_atomic(vto, KM_USER1);
diff --git a/arch/sparc/include/asm/futex_64.h b/arch/sparc/include/asm/futex_64.h
index 47f95839dc69..444e7bea23bc 100644
--- a/arch/sparc/include/asm/futex_64.h
+++ b/arch/sparc/include/asm/futex_64.h
@@ -30,7 +30,7 @@
30 : "r" (uaddr), "r" (oparg), "i" (-EFAULT) \ 30 : "r" (uaddr), "r" (oparg), "i" (-EFAULT) \
31 : "memory") 31 : "memory")
32 32
33static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) 33static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
34{ 34{
35 int op = (encoded_op >> 28) & 7; 35 int op = (encoded_op >> 28) & 7;
36 int cmp = (encoded_op >> 24) & 15; 36 int cmp = (encoded_op >> 24) & 15;
@@ -38,7 +38,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
38 int cmparg = (encoded_op << 20) >> 20; 38 int cmparg = (encoded_op << 20) >> 20;
39 int oldval = 0, ret, tem; 39 int oldval = 0, ret, tem;
40 40
41 if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))) 41 if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
42 return -EFAULT; 42 return -EFAULT;
43 if (unlikely((((unsigned long) uaddr) & 0x3UL))) 43 if (unlikely((((unsigned long) uaddr) & 0x3UL)))
44 return -EINVAL; 44 return -EINVAL;
@@ -85,26 +85,30 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
85} 85}
86 86
87static inline int 87static inline int
88futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 88futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
89 u32 oldval, u32 newval)
89{ 90{
91 int ret = 0;
92
90 __asm__ __volatile__( 93 __asm__ __volatile__(
91 "\n1: casa [%3] %%asi, %2, %0\n" 94 "\n1: casa [%4] %%asi, %3, %1\n"
92 "2:\n" 95 "2:\n"
93 " .section .fixup,#alloc,#execinstr\n" 96 " .section .fixup,#alloc,#execinstr\n"
94 " .align 4\n" 97 " .align 4\n"
95 "3: sethi %%hi(2b), %0\n" 98 "3: sethi %%hi(2b), %0\n"
96 " jmpl %0 + %%lo(2b), %%g0\n" 99 " jmpl %0 + %%lo(2b), %%g0\n"
97 " mov %4, %0\n" 100 " mov %5, %0\n"
98 " .previous\n" 101 " .previous\n"
99 " .section __ex_table,\"a\"\n" 102 " .section __ex_table,\"a\"\n"
100 " .align 4\n" 103 " .align 4\n"
101 " .word 1b, 3b\n" 104 " .word 1b, 3b\n"
102 " .previous\n" 105 " .previous\n"
103 : "=r" (newval) 106 : "+r" (ret), "=r" (newval)
104 : "0" (newval), "r" (oldval), "r" (uaddr), "i" (-EFAULT) 107 : "1" (newval), "r" (oldval), "r" (uaddr), "i" (-EFAULT)
105 : "memory"); 108 : "memory");
106 109
107 return newval; 110 *uval = newval;
111 return ret;
108} 112}
109 113
110#endif /* !(_SPARC64_FUTEX_H) */ 114#endif /* !(_SPARC64_FUTEX_H) */
diff --git a/arch/sparc/include/asm/rwsem.h b/arch/sparc/include/asm/rwsem.h
index a2b4302869bc..069bf4d663a1 100644
--- a/arch/sparc/include/asm/rwsem.h
+++ b/arch/sparc/include/asm/rwsem.h
@@ -13,53 +13,12 @@
13 13
14#ifdef __KERNEL__ 14#ifdef __KERNEL__
15 15
16#include <linux/list.h>
17#include <linux/spinlock.h>
18
19struct rwsem_waiter;
20
21struct rw_semaphore {
22 signed long count;
23#define RWSEM_UNLOCKED_VALUE 0x00000000L 16#define RWSEM_UNLOCKED_VALUE 0x00000000L
24#define RWSEM_ACTIVE_BIAS 0x00000001L 17#define RWSEM_ACTIVE_BIAS 0x00000001L
25#define RWSEM_ACTIVE_MASK 0xffffffffL 18#define RWSEM_ACTIVE_MASK 0xffffffffL
26#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1) 19#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
27#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS 20#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
28#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 21#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
29 spinlock_t wait_lock;
30 struct list_head wait_list;
31#ifdef CONFIG_DEBUG_LOCK_ALLOC
32 struct lockdep_map dep_map;
33#endif
34};
35
36#ifdef CONFIG_DEBUG_LOCK_ALLOC
37# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
38#else
39# define __RWSEM_DEP_MAP_INIT(lockname)
40#endif
41
42#define __RWSEM_INITIALIZER(name) \
43{ RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
44 LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
45
46#define DECLARE_RWSEM(name) \
47 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
48
49extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
50extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
51extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
52extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
53
54extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
55 struct lock_class_key *key);
56
57#define init_rwsem(sem) \
58do { \
59 static struct lock_class_key __key; \
60 \
61 __init_rwsem((sem), #sem, &__key); \
62} while (0)
63 22
64/* 23/*
65 * lock for reading 24 * lock for reading
@@ -160,11 +119,6 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
160 return atomic64_add_return(delta, (atomic64_t *)(&sem->count)); 119 return atomic64_add_return(delta, (atomic64_t *)(&sem->count));
161} 120}
162 121
163static inline int rwsem_is_locked(struct rw_semaphore *sem)
164{
165 return (sem->count != 0);
166}
167
168#endif /* __KERNEL__ */ 122#endif /* __KERNEL__ */
169 123
170#endif /* _SPARC64_RWSEM_H */ 124#endif /* _SPARC64_RWSEM_H */
diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c
index aeaa09a3c655..2cdc131b50ac 100644
--- a/arch/sparc/kernel/pcic.c
+++ b/arch/sparc/kernel/pcic.c
@@ -700,10 +700,8 @@ static void pcic_clear_clock_irq(void)
700 700
701static irqreturn_t pcic_timer_handler (int irq, void *h) 701static irqreturn_t pcic_timer_handler (int irq, void *h)
702{ 702{
703 write_seqlock(&xtime_lock); /* Dummy, to show that we remember */
704 pcic_clear_clock_irq(); 703 pcic_clear_clock_irq();
705 do_timer(1); 704 xtime_update(1);
706 write_sequnlock(&xtime_lock);
707#ifndef CONFIG_SMP 705#ifndef CONFIG_SMP
708 update_process_times(user_mode(get_irq_regs())); 706 update_process_times(user_mode(get_irq_regs()));
709#endif 707#endif
diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c
index 9c743b1886ff..4211bfc9bcad 100644
--- a/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c
@@ -85,7 +85,7 @@ int update_persistent_clock(struct timespec now)
85 85
86/* 86/*
87 * timer_interrupt() needs to keep up the real-time clock, 87 * timer_interrupt() needs to keep up the real-time clock,
88 * as well as call the "do_timer()" routine every clocktick 88 * as well as call the "xtime_update()" routine every clocktick
89 */ 89 */
90 90
91#define TICK_SIZE (tick_nsec / 1000) 91#define TICK_SIZE (tick_nsec / 1000)
@@ -96,14 +96,9 @@ static irqreturn_t timer_interrupt(int dummy, void *dev_id)
96 profile_tick(CPU_PROFILING); 96 profile_tick(CPU_PROFILING);
97#endif 97#endif
98 98
99 /* Protect counter clear so that do_gettimeoffset works */
100 write_seqlock(&xtime_lock);
101
102 clear_clock_irq(); 99 clear_clock_irq();
103 100
104 do_timer(1); 101 xtime_update(1);
105
106 write_sequnlock(&xtime_lock);
107 102
108#ifndef CONFIG_SMP 103#ifndef CONFIG_SMP
109 update_process_times(user_mode(get_irq_regs())); 104 update_process_times(user_mode(get_irq_regs()));
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index cbddeb38ffda..d3c7a12ad879 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -16,7 +16,7 @@
16#define ATOMIC_HASH(a) (&__atomic_hash[(((unsigned long)a)>>8) & (ATOMIC_HASH_SIZE-1)]) 16#define ATOMIC_HASH(a) (&__atomic_hash[(((unsigned long)a)>>8) & (ATOMIC_HASH_SIZE-1)])
17 17
18spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] = { 18spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] = {
19 [0 ... (ATOMIC_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED 19 [0 ... (ATOMIC_HASH_SIZE-1)] = __SPIN_LOCK_UNLOCKED(__atomic_hash)
20}; 20};
21 21
22#else /* SMP */ 22#else /* SMP */
diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h
index fe0d10dcae57..d03ec124a598 100644
--- a/arch/tile/include/asm/futex.h
+++ b/arch/tile/include/asm/futex.h
@@ -29,16 +29,16 @@
29#include <linux/uaccess.h> 29#include <linux/uaccess.h>
30#include <linux/errno.h> 30#include <linux/errno.h>
31 31
32extern struct __get_user futex_set(int __user *v, int i); 32extern struct __get_user futex_set(u32 __user *v, int i);
33extern struct __get_user futex_add(int __user *v, int n); 33extern struct __get_user futex_add(u32 __user *v, int n);
34extern struct __get_user futex_or(int __user *v, int n); 34extern struct __get_user futex_or(u32 __user *v, int n);
35extern struct __get_user futex_andn(int __user *v, int n); 35extern struct __get_user futex_andn(u32 __user *v, int n);
36extern struct __get_user futex_cmpxchg(int __user *v, int o, int n); 36extern struct __get_user futex_cmpxchg(u32 __user *v, int o, int n);
37 37
38#ifndef __tilegx__ 38#ifndef __tilegx__
39extern struct __get_user futex_xor(int __user *v, int n); 39extern struct __get_user futex_xor(u32 __user *v, int n);
40#else 40#else
41static inline struct __get_user futex_xor(int __user *uaddr, int n) 41static inline struct __get_user futex_xor(u32 __user *uaddr, int n)
42{ 42{
43 struct __get_user asm_ret = __get_user_4(uaddr); 43 struct __get_user asm_ret = __get_user_4(uaddr);
44 if (!asm_ret.err) { 44 if (!asm_ret.err) {
@@ -53,7 +53,7 @@ static inline struct __get_user futex_xor(int __user *uaddr, int n)
53} 53}
54#endif 54#endif
55 55
56static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) 56static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
57{ 57{
58 int op = (encoded_op >> 28) & 7; 58 int op = (encoded_op >> 28) & 7;
59 int cmp = (encoded_op >> 24) & 15; 59 int cmp = (encoded_op >> 24) & 15;
@@ -65,7 +65,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
65 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 65 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
66 oparg = 1 << oparg; 66 oparg = 1 << oparg;
67 67
68 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 68 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
69 return -EFAULT; 69 return -EFAULT;
70 70
71 pagefault_disable(); 71 pagefault_disable();
@@ -119,16 +119,17 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
119 return ret; 119 return ret;
120} 120}
121 121
122static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, 122static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
123 int newval) 123 u32 oldval, u32 newval)
124{ 124{
125 struct __get_user asm_ret; 125 struct __get_user asm_ret;
126 126
127 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 127 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
128 return -EFAULT; 128 return -EFAULT;
129 129
130 asm_ret = futex_cmpxchg(uaddr, oldval, newval); 130 asm_ret = futex_cmpxchg(uaddr, oldval, newval);
131 return asm_ret.err ? asm_ret.err : asm_ret.val; 131 *uval = asm_ret.val;
132 return asm_ret.err;
132} 133}
133 134
134#ifndef __tilegx__ 135#ifndef __tilegx__
diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common
index e351e14b4339..1e78940218c0 100644
--- a/arch/um/Kconfig.common
+++ b/arch/um/Kconfig.common
@@ -7,6 +7,7 @@ config UML
7 bool 7 bool
8 default y 8 default y
9 select HAVE_GENERIC_HARDIRQS 9 select HAVE_GENERIC_HARDIRQS
10 select GENERIC_HARDIRQS_NO_DEPRECATED
10 11
11config MMU 12config MMU
12 bool 13 bool
diff --git a/arch/um/Kconfig.x86 b/arch/um/Kconfig.x86
index 5ee328099c63..02fb017fed47 100644
--- a/arch/um/Kconfig.x86
+++ b/arch/um/Kconfig.x86
@@ -10,6 +10,8 @@ endmenu
10 10
11config UML_X86 11config UML_X86
12 def_bool y 12 def_bool y
13 select GENERIC_FIND_FIRST_BIT
14 select GENERIC_FIND_NEXT_BIT
13 15
14config 64BIT 16config 64BIT
15 bool 17 bool
@@ -19,6 +21,9 @@ config X86_32
19 def_bool !64BIT 21 def_bool !64BIT
20 select HAVE_AOUT 22 select HAVE_AOUT
21 23
24config X86_64
25 def_bool 64BIT
26
22config RWSEM_XCHGADD_ALGORITHM 27config RWSEM_XCHGADD_ALGORITHM
23 def_bool X86_XADD 28 def_bool X86_XADD
24 29
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 975613b23dcf..c70e047eed72 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -124,35 +124,18 @@ void mconsole_log(struct mc_request *req)
124#if 0 124#if 0
125void mconsole_proc(struct mc_request *req) 125void mconsole_proc(struct mc_request *req)
126{ 126{
127 struct nameidata nd;
128 struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt; 127 struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt;
129 struct file *file; 128 struct file *file;
130 int n, err; 129 int n;
131 char *ptr = req->request.data, *buf; 130 char *ptr = req->request.data, *buf;
132 mm_segment_t old_fs = get_fs(); 131 mm_segment_t old_fs = get_fs();
133 132
134 ptr += strlen("proc"); 133 ptr += strlen("proc");
135 ptr = skip_spaces(ptr); 134 ptr = skip_spaces(ptr);
136 135
137 err = vfs_path_lookup(mnt->mnt_root, mnt, ptr, LOOKUP_FOLLOW, &nd); 136 file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY);
138 if (err) {
139 mconsole_reply(req, "Failed to look up file", 1, 0);
140 goto out;
141 }
142
143 err = may_open(&nd.path, MAY_READ, O_RDONLY);
144 if (result) {
145 mconsole_reply(req, "Failed to open file", 1, 0);
146 path_put(&nd.path);
147 goto out;
148 }
149
150 file = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY,
151 current_cred());
152 err = PTR_ERR(file);
153 if (IS_ERR(file)) { 137 if (IS_ERR(file)) {
154 mconsole_reply(req, "Failed to open file", 1, 0); 138 mconsole_reply(req, "Failed to open file", 1, 0);
155 path_put(&nd.path);
156 goto out; 139 goto out;
157 } 140 }
158 141
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index ba4a98ba39c0..620f5b70957d 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -185,7 +185,7 @@ struct ubd {
185 .no_cow = 0, \ 185 .no_cow = 0, \
186 .shared = 0, \ 186 .shared = 0, \
187 .cow = DEFAULT_COW, \ 187 .cow = DEFAULT_COW, \
188 .lock = SPIN_LOCK_UNLOCKED, \ 188 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
189 .request = NULL, \ 189 .request = NULL, \
190 .start_sg = 0, \ 190 .start_sg = 0, \
191 .end_sg = 0, \ 191 .end_sg = 0, \
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 3f0ac9e0c966..64cfea80cfe2 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -35,8 +35,10 @@ int show_interrupts(struct seq_file *p, void *v)
35 } 35 }
36 36
37 if (i < NR_IRQS) { 37 if (i < NR_IRQS) {
38 raw_spin_lock_irqsave(&irq_desc[i].lock, flags); 38 struct irq_desc *desc = irq_to_desc(i);
39 action = irq_desc[i].action; 39
40 raw_spin_lock_irqsave(&desc->lock, flags);
41 action = desc->action;
40 if (!action) 42 if (!action)
41 goto skip; 43 goto skip;
42 seq_printf(p, "%3d: ",i); 44 seq_printf(p, "%3d: ",i);
@@ -46,7 +48,7 @@ int show_interrupts(struct seq_file *p, void *v)
46 for_each_online_cpu(j) 48 for_each_online_cpu(j)
47 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); 49 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
48#endif 50#endif
49 seq_printf(p, " %14s", irq_desc[i].chip->name); 51 seq_printf(p, " %14s", get_irq_desc_chip(desc)->name);
50 seq_printf(p, " %s", action->name); 52 seq_printf(p, " %s", action->name);
51 53
52 for (action=action->next; action; action = action->next) 54 for (action=action->next; action; action = action->next)
@@ -54,7 +56,7 @@ int show_interrupts(struct seq_file *p, void *v)
54 56
55 seq_putc(p, '\n'); 57 seq_putc(p, '\n');
56skip: 58skip:
57 raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags); 59 raw_spin_unlock_irqrestore(&desc->lock, flags);
58 } else if (i == NR_IRQS) 60 } else if (i == NR_IRQS)
59 seq_putc(p, '\n'); 61 seq_putc(p, '\n');
60 62
@@ -360,10 +362,10 @@ EXPORT_SYMBOL(um_request_irq);
360EXPORT_SYMBOL(reactivate_fd); 362EXPORT_SYMBOL(reactivate_fd);
361 363
362/* 364/*
363 * irq_chip must define (startup || enable) && 365 * irq_chip must define at least enable/disable and ack when
364 * (shutdown || disable) && end 366 * the edge handler is used.
365 */ 367 */
366static void dummy(unsigned int irq) 368static void dummy(struct irq_data *d)
367{ 369{
368} 370}
369 371
@@ -371,20 +373,17 @@ static void dummy(unsigned int irq)
371static struct irq_chip normal_irq_type = { 373static struct irq_chip normal_irq_type = {
372 .name = "SIGIO", 374 .name = "SIGIO",
373 .release = free_irq_by_irq_and_dev, 375 .release = free_irq_by_irq_and_dev,
374 .disable = dummy, 376 .irq_disable = dummy,
375 .enable = dummy, 377 .irq_enable = dummy,
376 .ack = dummy, 378 .irq_ack = dummy,
377 .end = dummy
378}; 379};
379 380
380static struct irq_chip SIGVTALRM_irq_type = { 381static struct irq_chip SIGVTALRM_irq_type = {
381 .name = "SIGVTALRM", 382 .name = "SIGVTALRM",
382 .release = free_irq_by_irq_and_dev, 383 .release = free_irq_by_irq_and_dev,
383 .shutdown = dummy, /* never called */ 384 .irq_disable = dummy,
384 .disable = dummy, 385 .irq_enable = dummy,
385 .enable = dummy, 386 .irq_ack = dummy,
386 .ack = dummy,
387 .end = dummy
388}; 387};
389 388
390void __init init_IRQ(void) 389void __init init_IRQ(void)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b4c2e9c67623..f8958b01b975 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -64,8 +64,12 @@ config X86
64 select HAVE_TEXT_POKE_SMP 64 select HAVE_TEXT_POKE_SMP
65 select HAVE_GENERIC_HARDIRQS 65 select HAVE_GENERIC_HARDIRQS
66 select HAVE_SPARSE_IRQ 66 select HAVE_SPARSE_IRQ
67 select GENERIC_FIND_FIRST_BIT
68 select GENERIC_FIND_NEXT_BIT
67 select GENERIC_IRQ_PROBE 69 select GENERIC_IRQ_PROBE
68 select GENERIC_PENDING_IRQ if SMP 70 select GENERIC_PENDING_IRQ if SMP
71 select GENERIC_IRQ_SHOW
72 select IRQ_FORCED_THREADING
69 select USE_GENERIC_SMP_HELPERS if SMP 73 select USE_GENERIC_SMP_HELPERS if SMP
70 74
71config INSTRUCTION_DECODER 75config INSTRUCTION_DECODER
@@ -1707,7 +1711,7 @@ config HAVE_ARCH_EARLY_PFN_TO_NID
1707 depends on NUMA 1711 depends on NUMA
1708 1712
1709config USE_PERCPU_NUMA_NODE_ID 1713config USE_PERCPU_NUMA_NODE_ID
1710 def_bool X86_64 1714 def_bool y
1711 depends on NUMA 1715 depends on NUMA
1712 1716
1713menu "Power management and ACPI options" 1717menu "Power management and ACPI options"
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 283c5a6a03a6..ed47e6e1747f 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -294,11 +294,6 @@ config X86_GENERIC
294 294
295endif 295endif
296 296
297config X86_CPU
298 def_bool y
299 select GENERIC_FIND_FIRST_BIT
300 select GENERIC_FIND_NEXT_BIT
301
302# 297#
303# Define implied options from the CPU selection here 298# Define implied options from the CPU selection here
304config X86_INTERNODE_CACHE_SHIFT 299config X86_INTERNODE_CACHE_SHIFT
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
index 646aa78ba5fd..46a823882437 100644
--- a/arch/x86/boot/compressed/mkpiggy.c
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -62,7 +62,12 @@ int main(int argc, char *argv[])
62 if (fseek(f, -4L, SEEK_END)) { 62 if (fseek(f, -4L, SEEK_END)) {
63 perror(argv[1]); 63 perror(argv[1]);
64 } 64 }
65 fread(&olen, sizeof olen, 1, f); 65
66 if (fread(&olen, sizeof(olen), 1, f) != 1) {
67 perror(argv[1]);
68 return 1;
69 }
70
66 ilen = ftell(f); 71 ilen = ftell(f);
67 olen = getle32(&olen); 72 olen = getle32(&olen);
68 fclose(f); 73 fclose(f);
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 518bb99c3394..430312ba6e3f 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -25,6 +25,8 @@
25#define sysretl_audit ia32_ret_from_sys_call 25#define sysretl_audit ia32_ret_from_sys_call
26#endif 26#endif
27 27
28 .section .entry.text, "ax"
29
28#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) 30#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
29 31
30 .macro IA32_ARG_FIXUP noebp=0 32 .macro IA32_ARG_FIXUP noebp=0
@@ -126,26 +128,20 @@ ENTRY(ia32_sysenter_target)
126 */ 128 */
127 ENABLE_INTERRUPTS(CLBR_NONE) 129 ENABLE_INTERRUPTS(CLBR_NONE)
128 movl %ebp,%ebp /* zero extension */ 130 movl %ebp,%ebp /* zero extension */
129 pushq $__USER32_DS 131 pushq_cfi $__USER32_DS
130 CFI_ADJUST_CFA_OFFSET 8
131 /*CFI_REL_OFFSET ss,0*/ 132 /*CFI_REL_OFFSET ss,0*/
132 pushq %rbp 133 pushq_cfi %rbp
133 CFI_ADJUST_CFA_OFFSET 8
134 CFI_REL_OFFSET rsp,0 134 CFI_REL_OFFSET rsp,0
135 pushfq 135 pushfq_cfi
136 CFI_ADJUST_CFA_OFFSET 8
137 /*CFI_REL_OFFSET rflags,0*/ 136 /*CFI_REL_OFFSET rflags,0*/
138 movl 8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d 137 movl 8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d
139 CFI_REGISTER rip,r10 138 CFI_REGISTER rip,r10
140 pushq $__USER32_CS 139 pushq_cfi $__USER32_CS
141 CFI_ADJUST_CFA_OFFSET 8
142 /*CFI_REL_OFFSET cs,0*/ 140 /*CFI_REL_OFFSET cs,0*/
143 movl %eax, %eax 141 movl %eax, %eax
144 pushq %r10 142 pushq_cfi %r10
145 CFI_ADJUST_CFA_OFFSET 8
146 CFI_REL_OFFSET rip,0 143 CFI_REL_OFFSET rip,0
147 pushq %rax 144 pushq_cfi %rax
148 CFI_ADJUST_CFA_OFFSET 8
149 cld 145 cld
150 SAVE_ARGS 0,0,1 146 SAVE_ARGS 0,0,1
151 /* no need to do an access_ok check here because rbp has been 147 /* no need to do an access_ok check here because rbp has been
@@ -182,11 +178,9 @@ sysexit_from_sys_call:
182 xorq %r9,%r9 178 xorq %r9,%r9
183 xorq %r10,%r10 179 xorq %r10,%r10
184 xorq %r11,%r11 180 xorq %r11,%r11
185 popfq 181 popfq_cfi
186 CFI_ADJUST_CFA_OFFSET -8
187 /*CFI_RESTORE rflags*/ 182 /*CFI_RESTORE rflags*/
188 popq %rcx /* User %esp */ 183 popq_cfi %rcx /* User %esp */
189 CFI_ADJUST_CFA_OFFSET -8
190 CFI_REGISTER rsp,rcx 184 CFI_REGISTER rsp,rcx
191 TRACE_IRQS_ON 185 TRACE_IRQS_ON
192 ENABLE_INTERRUPTS_SYSEXIT32 186 ENABLE_INTERRUPTS_SYSEXIT32
@@ -421,8 +415,7 @@ ENTRY(ia32_syscall)
421 */ 415 */
422 ENABLE_INTERRUPTS(CLBR_NONE) 416 ENABLE_INTERRUPTS(CLBR_NONE)
423 movl %eax,%eax 417 movl %eax,%eax
424 pushq %rax 418 pushq_cfi %rax
425 CFI_ADJUST_CFA_OFFSET 8
426 cld 419 cld
427 /* note the registers are not zero extended to the sf. 420 /* note the registers are not zero extended to the sf.
428 this could be a problem. */ 421 this could be a problem. */
@@ -851,4 +844,7 @@ ia32_sys_call_table:
851 .quad sys_fanotify_init 844 .quad sys_fanotify_init
852 .quad sys32_fanotify_mark 845 .quad sys32_fanotify_mark
853 .quad sys_prlimit64 /* 340 */ 846 .quad sys_prlimit64 /* 340 */
847 .quad sys_name_to_handle_at
848 .quad compat_sys_open_by_handle_at
849 .quad compat_sys_clock_adjtime
854ia32_syscall_end: 850ia32_syscall_end:
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 211ca3f7fd16..b964ec457546 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -88,6 +88,7 @@ extern int acpi_disabled;
88extern int acpi_pci_disabled; 88extern int acpi_pci_disabled;
89extern int acpi_skip_timer_override; 89extern int acpi_skip_timer_override;
90extern int acpi_use_timer_override; 90extern int acpi_use_timer_override;
91extern int acpi_fix_pin2_polarity;
91 92
92extern u8 acpi_sci_flags; 93extern u8 acpi_sci_flags;
93extern int acpi_sci_override_gsi; 94extern int acpi_sci_override_gsi;
@@ -185,15 +186,7 @@ struct bootnode;
185 186
186#ifdef CONFIG_ACPI_NUMA 187#ifdef CONFIG_ACPI_NUMA
187extern int acpi_numa; 188extern int acpi_numa;
188extern void acpi_get_nodes(struct bootnode *physnodes, unsigned long start, 189extern int x86_acpi_numa_init(void);
189 unsigned long end);
190extern int acpi_scan_nodes(unsigned long start, unsigned long end);
191#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
192
193#ifdef CONFIG_NUMA_EMU
194extern void acpi_fake_nodes(const struct bootnode *fake_nodes,
195 int num_nodes);
196#endif
197#endif /* CONFIG_ACPI_NUMA */ 190#endif /* CONFIG_ACPI_NUMA */
198 191
199#define acpi_unlazy_tlb(x) leave_mm(x) 192#define acpi_unlazy_tlb(x) leave_mm(x)
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 64dc82ee19f0..e264ae5a1443 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -9,23 +9,20 @@ struct amd_nb_bus_dev_range {
9 u8 dev_limit; 9 u8 dev_limit;
10}; 10};
11 11
12extern struct pci_device_id amd_nb_misc_ids[]; 12extern const struct pci_device_id amd_nb_misc_ids[];
13extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; 13extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
14struct bootnode; 14struct bootnode;
15 15
16extern int early_is_amd_nb(u32 value); 16extern int early_is_amd_nb(u32 value);
17extern int amd_cache_northbridges(void); 17extern int amd_cache_northbridges(void);
18extern void amd_flush_garts(void); 18extern void amd_flush_garts(void);
19extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn); 19extern int amd_numa_init(void);
20extern int amd_scan_nodes(void); 20extern int amd_get_subcaches(int);
21 21extern int amd_set_subcaches(int, int);
22#ifdef CONFIG_NUMA_EMU
23extern void amd_fake_nodes(const struct bootnode *nodes, int nr_nodes);
24extern void amd_get_nodes(struct bootnode *nodes);
25#endif
26 22
27struct amd_northbridge { 23struct amd_northbridge {
28 struct pci_dev *misc; 24 struct pci_dev *misc;
25 struct pci_dev *link;
29}; 26};
30 27
31struct amd_northbridge_info { 28struct amd_northbridge_info {
@@ -37,6 +34,7 @@ extern struct amd_northbridge_info amd_northbridges;
37 34
38#define AMD_NB_GART 0x1 35#define AMD_NB_GART 0x1
39#define AMD_NB_L3_INDEX_DISABLE 0x2 36#define AMD_NB_L3_INDEX_DISABLE 0x2
37#define AMD_NB_L3_PARTITIONING 0x4
40 38
41#ifdef CONFIG_AMD_NB 39#ifdef CONFIG_AMD_NB
42 40
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 5b7d5137e167..a279d98ea95e 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -303,8 +303,6 @@ struct apic {
303 303
304 void (*setup_apic_routing)(void); 304 void (*setup_apic_routing)(void);
305 int (*multi_timer_check)(int apic, int irq); 305 int (*multi_timer_check)(int apic, int irq);
306 int (*apicid_to_node)(int logical_apicid);
307 int (*cpu_to_logical_apicid)(int cpu);
308 int (*cpu_present_to_apicid)(int mps_cpu); 306 int (*cpu_present_to_apicid)(int mps_cpu);
309 void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap); 307 void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap);
310 void (*setup_portio_remap)(void); 308 void (*setup_portio_remap)(void);
@@ -352,6 +350,23 @@ struct apic {
352 void (*icr_write)(u32 low, u32 high); 350 void (*icr_write)(u32 low, u32 high);
353 void (*wait_icr_idle)(void); 351 void (*wait_icr_idle)(void);
354 u32 (*safe_wait_icr_idle)(void); 352 u32 (*safe_wait_icr_idle)(void);
353
354#ifdef CONFIG_X86_32
355 /*
356 * Called very early during boot from get_smp_config(). It should
357 * return the logical apicid. x86_[bios]_cpu_to_apicid is
358 * initialized before this function is called.
359 *
360 * If logical apicid can't be determined that early, the function
361 * may return BAD_APICID. Logical apicid will be configured after
362 * init_apic_ldr() while bringing up CPUs. Note that NUMA affinity
363 * won't be applied properly during early boot in this case.
364 */
365 int (*x86_32_early_logical_apicid)(int cpu);
366
367 /* determine CPU -> NUMA node mapping */
368 int (*x86_32_numa_cpu_node)(int cpu);
369#endif
355}; 370};
356 371
357/* 372/*
@@ -499,6 +514,11 @@ extern struct apic apic_noop;
499 514
500extern struct apic apic_default; 515extern struct apic apic_default;
501 516
517static inline int noop_x86_32_early_logical_apicid(int cpu)
518{
519 return BAD_APICID;
520}
521
502/* 522/*
503 * Set up the logical destination ID. 523 * Set up the logical destination ID.
504 * 524 *
@@ -518,7 +538,7 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
518 return cpuid_apic >> index_msb; 538 return cpuid_apic >> index_msb;
519} 539}
520 540
521extern int default_apicid_to_node(int logical_apicid); 541extern int default_x86_32_numa_cpu_node(int cpu);
522 542
523#endif 543#endif
524 544
@@ -554,12 +574,6 @@ static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_ma
554 *retmap = *phys_map; 574 *retmap = *phys_map;
555} 575}
556 576
557/* Mapping from cpu number to logical apicid */
558static inline int default_cpu_to_logical_apicid(int cpu)
559{
560 return 1 << cpu;
561}
562
563static inline int __default_cpu_present_to_apicid(int mps_cpu) 577static inline int __default_cpu_present_to_apicid(int mps_cpu)
564{ 578{
565 if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) 579 if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
@@ -592,8 +606,4 @@ extern int default_check_phys_apicid_present(int phys_apicid);
592 606
593#endif /* CONFIG_X86_LOCAL_APIC */ 607#endif /* CONFIG_X86_LOCAL_APIC */
594 608
595#ifdef CONFIG_X86_32
596extern u8 cpu_2_logical_apicid[NR_CPUS];
597#endif
598
599#endif /* _ASM_X86_APIC_H */ 609#endif /* _ASM_X86_APIC_H */
diff --git a/arch/x86/include/asm/ce4100.h b/arch/x86/include/asm/ce4100.h
new file mode 100644
index 000000000000..e656ad8c0a2e
--- /dev/null
+++ b/arch/x86/include/asm/ce4100.h
@@ -0,0 +1,6 @@
1#ifndef _ASM_CE4100_H_
2#define _ASM_CE4100_H_
3
4int ce4100_pci_init(void);
5
6#endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 220e2ea08e80..91f3e087cf21 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -160,6 +160,7 @@
160#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */ 160#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */
161#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */ 161#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
162#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */ 162#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
163#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */
163 164
164/* 165/*
165 * Auxiliary flags: Linux defined - For features scattered in various 166 * Auxiliary flags: Linux defined - For features scattered in various
@@ -279,6 +280,7 @@ extern const char * const x86_power_flags[32];
279#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) 280#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
280#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) 281#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
281#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) 282#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
283#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
282 284
283#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) 285#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
284# define cpu_has_invlpg 1 286# define cpu_has_invlpg 1
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 57650ab4a5f5..1cd6d26a0a8d 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -16,10 +16,13 @@ BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
16BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) 16BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
17BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) 17BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
18 18
19.irpc idx, "01234567" 19.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
20 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
21.if NUM_INVALIDATE_TLB_VECTORS > \idx
20BUILD_INTERRUPT3(invalidate_interrupt\idx, 22BUILD_INTERRUPT3(invalidate_interrupt\idx,
21 (INVALIDATE_TLB_VECTOR_START)+\idx, 23 (INVALIDATE_TLB_VECTOR_START)+\idx,
22 smp_invalidate_interrupt) 24 smp_invalidate_interrupt)
25.endif
23.endr 26.endr
24#endif 27#endif
25 28
diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h
index 06850a7194e1..2c6fc9e62812 100644
--- a/arch/x86/include/asm/frame.h
+++ b/arch/x86/include/asm/frame.h
@@ -7,14 +7,12 @@
7 frame pointer later */ 7 frame pointer later */
8#ifdef CONFIG_FRAME_POINTER 8#ifdef CONFIG_FRAME_POINTER
9 .macro FRAME 9 .macro FRAME
10 pushl %ebp 10 pushl_cfi %ebp
11 CFI_ADJUST_CFA_OFFSET 4
12 CFI_REL_OFFSET ebp,0 11 CFI_REL_OFFSET ebp,0
13 movl %esp,%ebp 12 movl %esp,%ebp
14 .endm 13 .endm
15 .macro ENDFRAME 14 .macro ENDFRAME
16 popl %ebp 15 popl_cfi %ebp
17 CFI_ADJUST_CFA_OFFSET -4
18 CFI_RESTORE ebp 16 CFI_RESTORE ebp
19 .endm 17 .endm
20#else 18#else
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index 1f11ce44e956..d09bb03653f0 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -37,7 +37,7 @@
37 "+m" (*uaddr), "=&r" (tem) \ 37 "+m" (*uaddr), "=&r" (tem) \
38 : "r" (oparg), "i" (-EFAULT), "1" (0)) 38 : "r" (oparg), "i" (-EFAULT), "1" (0))
39 39
40static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) 40static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
41{ 41{
42 int op = (encoded_op >> 28) & 7; 42 int op = (encoded_op >> 28) & 7;
43 int cmp = (encoded_op >> 24) & 15; 43 int cmp = (encoded_op >> 24) & 15;
@@ -48,7 +48,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
48 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 48 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
49 oparg = 1 << oparg; 49 oparg = 1 << oparg;
50 50
51 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 51 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
52 return -EFAULT; 52 return -EFAULT;
53 53
54#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) 54#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP)
@@ -109,9 +109,10 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
109 return ret; 109 return ret;
110} 110}
111 111
112static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, 112static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
113 int newval) 113 u32 oldval, u32 newval)
114{ 114{
115 int ret = 0;
115 116
116#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) 117#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP)
117 /* Real i386 machines have no cmpxchg instruction */ 118 /* Real i386 machines have no cmpxchg instruction */
@@ -119,21 +120,22 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval,
119 return -ENOSYS; 120 return -ENOSYS;
120#endif 121#endif
121 122
122 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) 123 if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
123 return -EFAULT; 124 return -EFAULT;
124 125
125 asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %3, %1\n" 126 asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n"
126 "2:\t.section .fixup, \"ax\"\n" 127 "2:\t.section .fixup, \"ax\"\n"
127 "3:\tmov %2, %0\n" 128 "3:\tmov %3, %0\n"
128 "\tjmp 2b\n" 129 "\tjmp 2b\n"
129 "\t.previous\n" 130 "\t.previous\n"
130 _ASM_EXTABLE(1b, 3b) 131 _ASM_EXTABLE(1b, 3b)
131 : "=a" (oldval), "+m" (*uaddr) 132 : "+r" (ret), "=a" (oldval), "+m" (*uaddr)
132 : "i" (-EFAULT), "r" (newval), "0" (oldval) 133 : "i" (-EFAULT), "r" (newval), "1" (oldval)
133 : "memory" 134 : "memory"
134 ); 135 );
135 136
136 return oldval; 137 *uval = oldval;
138 return ret;
137} 139}
138 140
139#endif 141#endif
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 0274ec5a7e62..bb9efe8706e2 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -45,6 +45,30 @@ extern void invalidate_interrupt4(void);
45extern void invalidate_interrupt5(void); 45extern void invalidate_interrupt5(void);
46extern void invalidate_interrupt6(void); 46extern void invalidate_interrupt6(void);
47extern void invalidate_interrupt7(void); 47extern void invalidate_interrupt7(void);
48extern void invalidate_interrupt8(void);
49extern void invalidate_interrupt9(void);
50extern void invalidate_interrupt10(void);
51extern void invalidate_interrupt11(void);
52extern void invalidate_interrupt12(void);
53extern void invalidate_interrupt13(void);
54extern void invalidate_interrupt14(void);
55extern void invalidate_interrupt15(void);
56extern void invalidate_interrupt16(void);
57extern void invalidate_interrupt17(void);
58extern void invalidate_interrupt18(void);
59extern void invalidate_interrupt19(void);
60extern void invalidate_interrupt20(void);
61extern void invalidate_interrupt21(void);
62extern void invalidate_interrupt22(void);
63extern void invalidate_interrupt23(void);
64extern void invalidate_interrupt24(void);
65extern void invalidate_interrupt25(void);
66extern void invalidate_interrupt26(void);
67extern void invalidate_interrupt27(void);
68extern void invalidate_interrupt28(void);
69extern void invalidate_interrupt29(void);
70extern void invalidate_interrupt30(void);
71extern void invalidate_interrupt31(void);
48 72
49extern void irq_move_cleanup_interrupt(void); 73extern void irq_move_cleanup_interrupt(void);
50extern void reboot_interrupt(void); 74extern void reboot_interrupt(void);
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 36fb1a6a5109..8dbe353e41e1 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -11,8 +11,8 @@ kernel_physical_mapping_init(unsigned long start,
11 unsigned long page_size_mask); 11 unsigned long page_size_mask);
12 12
13 13
14extern unsigned long __initdata e820_table_start; 14extern unsigned long __initdata pgt_buf_start;
15extern unsigned long __meminitdata e820_table_end; 15extern unsigned long __meminitdata pgt_buf_end;
16extern unsigned long __meminitdata e820_table_top; 16extern unsigned long __meminitdata pgt_buf_top;
17 17
18#endif /* _ASM_X86_INIT_32_H */ 18#endif /* _ASM_X86_INIT_32_H */
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index 0b7228268a63..615fa9061b57 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -123,10 +123,6 @@ extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask,
123 int vector); 123 int vector);
124extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, 124extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
125 int vector); 125 int vector);
126extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
127 int vector);
128extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
129 int vector);
130 126
131/* Avoid include hell */ 127/* Avoid include hell */
132#define NMI_VECTOR 0x02 128#define NMI_VECTOR 0x02
@@ -150,6 +146,10 @@ static inline void __default_local_send_IPI_all(int vector)
150} 146}
151 147
152#ifdef CONFIG_X86_32 148#ifdef CONFIG_X86_32
149extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
150 int vector);
151extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
152 int vector);
153extern void default_send_IPI_mask_logical(const struct cpumask *mask, 153extern void default_send_IPI_mask_logical(const struct cpumask *mask,
154 int vector); 154 int vector);
155extern void default_send_IPI_allbutself(int vector); 155extern void default_send_IPI_allbutself(int vector);
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 6af0894dafb4..6e976ee3b3ef 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -1,6 +1,7 @@
1#ifndef _ASM_X86_IRQ_VECTORS_H 1#ifndef _ASM_X86_IRQ_VECTORS_H
2#define _ASM_X86_IRQ_VECTORS_H 2#define _ASM_X86_IRQ_VECTORS_H
3 3
4#include <linux/threads.h>
4/* 5/*
5 * Linux IRQ vector layout. 6 * Linux IRQ vector layout.
6 * 7 *
@@ -16,8 +17,8 @@
16 * Vectors 0 ... 31 : system traps and exceptions - hardcoded events 17 * Vectors 0 ... 31 : system traps and exceptions - hardcoded events
17 * Vectors 32 ... 127 : device interrupts 18 * Vectors 32 ... 127 : device interrupts
18 * Vector 128 : legacy int80 syscall interface 19 * Vector 128 : legacy int80 syscall interface
19 * Vectors 129 ... 237 : device interrupts 20 * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 : device interrupts
20 * Vectors 238 ... 255 : special interrupts 21 * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
21 * 22 *
22 * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. 23 * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
23 * 24 *
@@ -96,37 +97,43 @@
96#define THRESHOLD_APIC_VECTOR 0xf9 97#define THRESHOLD_APIC_VECTOR 0xf9
97#define REBOOT_VECTOR 0xf8 98#define REBOOT_VECTOR 0xf8
98 99
99/* f0-f7 used for spreading out TLB flushes: */
100#define INVALIDATE_TLB_VECTOR_END 0xf7
101#define INVALIDATE_TLB_VECTOR_START 0xf0
102#define NUM_INVALIDATE_TLB_VECTORS 8
103
104/*
105 * Local APIC timer IRQ vector is on a different priority level,
106 * to work around the 'lost local interrupt if more than 2 IRQ
107 * sources per level' errata.
108 */
109#define LOCAL_TIMER_VECTOR 0xef
110
111/* 100/*
112 * Generic system vector for platform specific use 101 * Generic system vector for platform specific use
113 */ 102 */
114#define X86_PLATFORM_IPI_VECTOR 0xed 103#define X86_PLATFORM_IPI_VECTOR 0xf7
115 104
116/* 105/*
117 * IRQ work vector: 106 * IRQ work vector:
118 */ 107 */
119#define IRQ_WORK_VECTOR 0xec 108#define IRQ_WORK_VECTOR 0xf6
120 109
121#define UV_BAU_MESSAGE 0xea 110#define UV_BAU_MESSAGE 0xf5
122 111
123/* 112/*
124 * Self IPI vector for machine checks 113 * Self IPI vector for machine checks
125 */ 114 */
126#define MCE_SELF_VECTOR 0xeb 115#define MCE_SELF_VECTOR 0xf4
127 116
128/* Xen vector callback to receive events in a HVM domain */ 117/* Xen vector callback to receive events in a HVM domain */
129#define XEN_HVM_EVTCHN_CALLBACK 0xe9 118#define XEN_HVM_EVTCHN_CALLBACK 0xf3
119
120/*
121 * Local APIC timer IRQ vector is on a different priority level,
122 * to work around the 'lost local interrupt if more than 2 IRQ
123 * sources per level' errata.
124 */
125#define LOCAL_TIMER_VECTOR 0xef
126
127/* up to 32 vectors used for spreading out TLB flushes: */
128#if NR_CPUS <= 32
129# define NUM_INVALIDATE_TLB_VECTORS (NR_CPUS)
130#else
131# define NUM_INVALIDATE_TLB_VECTORS (32)
132#endif
133
134#define INVALIDATE_TLB_VECTOR_END (0xee)
135#define INVALIDATE_TLB_VECTOR_START \
136 (INVALIDATE_TLB_VECTOR_END-NUM_INVALIDATE_TLB_VECTORS+1)
130 137
131#define NR_VECTORS 256 138#define NR_VECTORS 256
132 139
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index ca242d35e873..518bbbb9ee59 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -13,7 +13,6 @@ enum die_val {
13 DIE_PANIC, 13 DIE_PANIC,
14 DIE_NMI, 14 DIE_NMI,
15 DIE_DIE, 15 DIE_DIE,
16 DIE_NMIWATCHDOG,
17 DIE_KERNELDEBUG, 16 DIE_KERNELDEBUG,
18 DIE_TRAP, 17 DIE_TRAP,
19 DIE_GPF, 18 DIE_GPF,
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 0c90dd9f0505..9c7d95f6174b 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -25,7 +25,6 @@ extern int pic_mode;
25#define MAX_IRQ_SOURCES 256 25#define MAX_IRQ_SOURCES 256
26 26
27extern unsigned int def_to_bigsmp; 27extern unsigned int def_to_bigsmp;
28extern u8 apicid_2_node[];
29 28
30#ifdef CONFIG_X86_NUMAQ 29#ifdef CONFIG_X86_NUMAQ
31extern int mp_bus_id_to_node[MAX_MP_BUSSES]; 30extern int mp_bus_id_to_node[MAX_MP_BUSSES];
@@ -33,8 +32,6 @@ extern int mp_bus_id_to_local[MAX_MP_BUSSES];
33extern int quad_local_to_mp_bus_id [NR_CPUS/4][4]; 32extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
34#endif 33#endif
35 34
36#define MAX_APICID 256
37
38#else /* CONFIG_X86_64: */ 35#else /* CONFIG_X86_64: */
39 36
40#define MAX_MP_BUSSES 256 37#define MAX_MP_BUSSES 256
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 4d0dfa0d998e..823d48223400 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -36,6 +36,11 @@
36#define MSR_IA32_PERFCTR1 0x000000c2 36#define MSR_IA32_PERFCTR1 0x000000c2
37#define MSR_FSB_FREQ 0x000000cd 37#define MSR_FSB_FREQ 0x000000cd
38 38
39#define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2
40#define NHM_C3_AUTO_DEMOTE (1UL << 25)
41#define NHM_C1_AUTO_DEMOTE (1UL << 26)
42#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25)
43
39#define MSR_MTRRcap 0x000000fe 44#define MSR_MTRRcap 0x000000fe
40#define MSR_IA32_BBL_CR_CTL 0x00000119 45#define MSR_IA32_BBL_CR_CTL 0x00000119
41 46
@@ -47,6 +52,9 @@
47#define MSR_IA32_MCG_STATUS 0x0000017a 52#define MSR_IA32_MCG_STATUS 0x0000017a
48#define MSR_IA32_MCG_CTL 0x0000017b 53#define MSR_IA32_MCG_CTL 0x0000017b
49 54
55#define MSR_OFFCORE_RSP_0 0x000001a6
56#define MSR_OFFCORE_RSP_1 0x000001a7
57
50#define MSR_IA32_PEBS_ENABLE 0x000003f1 58#define MSR_IA32_PEBS_ENABLE 0x000003f1
51#define MSR_IA32_DS_AREA 0x00000600 59#define MSR_IA32_DS_AREA 0x00000600
52#define MSR_IA32_PERF_CAPABILITIES 0x00000345 60#define MSR_IA32_PERF_CAPABILITIES 0x00000345
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index c76f5b92b840..07f46016d3ff 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -7,7 +7,6 @@
7 7
8#ifdef CONFIG_X86_LOCAL_APIC 8#ifdef CONFIG_X86_LOCAL_APIC
9 9
10extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
11extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); 10extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
12extern int reserve_perfctr_nmi(unsigned int); 11extern int reserve_perfctr_nmi(unsigned int);
13extern void release_perfctr_nmi(unsigned int); 12extern void release_perfctr_nmi(unsigned int);
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 27da400d3138..3d4dab43c994 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -1,5 +1,57 @@
1#ifndef _ASM_X86_NUMA_H
2#define _ASM_X86_NUMA_H
3
4#include <asm/topology.h>
5#include <asm/apicdef.h>
6
7#ifdef CONFIG_NUMA
8
9#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
10
11/*
12 * __apicid_to_node[] stores the raw mapping between physical apicid and
13 * node and is used to initialize cpu_to_node mapping.
14 *
15 * The mapping may be overridden by apic->numa_cpu_node() on 32bit and thus
16 * should be accessed by the accessors - set_apicid_to_node() and
17 * numa_cpu_node().
18 */
19extern s16 __apicid_to_node[MAX_LOCAL_APIC];
20
21static inline void set_apicid_to_node(int apicid, s16 node)
22{
23 __apicid_to_node[apicid] = node;
24}
25#else /* CONFIG_NUMA */
26static inline void set_apicid_to_node(int apicid, s16 node)
27{
28}
29#endif /* CONFIG_NUMA */
30
1#ifdef CONFIG_X86_32 31#ifdef CONFIG_X86_32
2# include "numa_32.h" 32# include "numa_32.h"
3#else 33#else
4# include "numa_64.h" 34# include "numa_64.h"
5#endif 35#endif
36
37#ifdef CONFIG_NUMA
38extern void __cpuinit numa_set_node(int cpu, int node);
39extern void __cpuinit numa_clear_node(int cpu);
40extern void __init numa_init_array(void);
41extern void __init init_cpu_to_node(void);
42extern void __cpuinit numa_add_cpu(int cpu);
43extern void __cpuinit numa_remove_cpu(int cpu);
44#else /* CONFIG_NUMA */
45static inline void numa_set_node(int cpu, int node) { }
46static inline void numa_clear_node(int cpu) { }
47static inline void numa_init_array(void) { }
48static inline void init_cpu_to_node(void) { }
49static inline void numa_add_cpu(int cpu) { }
50static inline void numa_remove_cpu(int cpu) { }
51#endif /* CONFIG_NUMA */
52
53#ifdef CONFIG_DEBUG_PER_CPU_MAPS
54struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable);
55#endif
56
57#endif /* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h
index b0ef2b449a9d..c6beed1ef103 100644
--- a/arch/x86/include/asm/numa_32.h
+++ b/arch/x86/include/asm/numa_32.h
@@ -4,7 +4,12 @@
4extern int numa_off; 4extern int numa_off;
5 5
6extern int pxm_to_nid(int pxm); 6extern int pxm_to_nid(int pxm);
7extern void numa_remove_cpu(int cpu); 7
8#ifdef CONFIG_NUMA
9extern int __cpuinit numa_cpu_node(int cpu);
10#else /* CONFIG_NUMA */
11static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; }
12#endif /* CONFIG_NUMA */
8 13
9#ifdef CONFIG_HIGHMEM 14#ifdef CONFIG_HIGHMEM
10extern void set_highmem_pages_init(void); 15extern void set_highmem_pages_init(void);
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 0493be39607c..344eb1790b46 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -2,23 +2,16 @@
2#define _ASM_X86_NUMA_64_H 2#define _ASM_X86_NUMA_64_H
3 3
4#include <linux/nodemask.h> 4#include <linux/nodemask.h>
5#include <asm/apicdef.h>
6 5
7struct bootnode { 6struct bootnode {
8 u64 start; 7 u64 start;
9 u64 end; 8 u64 end;
10}; 9};
11 10
12extern int compute_hash_shift(struct bootnode *nodes, int numblks,
13 int *nodeids);
14
15#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) 11#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
16 12
17extern void numa_init_array(void);
18extern int numa_off; 13extern int numa_off;
19 14
20extern s16 apicid_to_node[MAX_LOCAL_APIC];
21
22extern unsigned long numa_free_all_bootmem(void); 15extern unsigned long numa_free_all_bootmem(void);
23extern void setup_node_bootmem(int nodeid, unsigned long start, 16extern void setup_node_bootmem(int nodeid, unsigned long start,
24 unsigned long end); 17 unsigned long end);
@@ -31,11 +24,11 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
31 */ 24 */
32#define NODE_MIN_SIZE (4*1024*1024) 25#define NODE_MIN_SIZE (4*1024*1024)
33 26
34extern void __init init_cpu_to_node(void); 27extern nodemask_t numa_nodes_parsed __initdata;
35extern void __cpuinit numa_set_node(int cpu, int node); 28
36extern void __cpuinit numa_clear_node(int cpu); 29extern int __cpuinit numa_cpu_node(int cpu);
37extern void __cpuinit numa_add_cpu(int cpu); 30extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
38extern void __cpuinit numa_remove_cpu(int cpu); 31extern void __init numa_set_distance(int from, int to, int distance);
39 32
40#ifdef CONFIG_NUMA_EMU 33#ifdef CONFIG_NUMA_EMU
41#define FAKE_NODE_MIN_SIZE ((u64)32 << 20) 34#define FAKE_NODE_MIN_SIZE ((u64)32 << 20)
@@ -43,11 +36,7 @@ extern void __cpuinit numa_remove_cpu(int cpu);
43void numa_emu_cmdline(char *); 36void numa_emu_cmdline(char *);
44#endif /* CONFIG_NUMA_EMU */ 37#endif /* CONFIG_NUMA_EMU */
45#else 38#else
46static inline void init_cpu_to_node(void) { } 39static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; }
47static inline void numa_set_node(int cpu, int node) { }
48static inline void numa_clear_node(int cpu) { }
49static inline void numa_add_cpu(int cpu, int node) { }
50static inline void numa_remove_cpu(int cpu) { }
51#endif 40#endif
52 41
53#endif /* _ASM_X86_NUMA_64_H */ 42#endif /* _ASM_X86_NUMA_64_H */
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 1df66211fd1b..bce688d54c12 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -2,6 +2,7 @@
2#define _ASM_X86_PAGE_DEFS_H 2#define _ASM_X86_PAGE_DEFS_H
3 3
4#include <linux/const.h> 4#include <linux/const.h>
5#include <linux/types.h>
5 6
6/* PAGE_SHIFT determines the page size */ 7/* PAGE_SHIFT determines the page size */
7#define PAGE_SHIFT 12 8#define PAGE_SHIFT 12
@@ -45,11 +46,15 @@ extern int devmem_is_allowed(unsigned long pagenr);
45extern unsigned long max_low_pfn_mapped; 46extern unsigned long max_low_pfn_mapped;
46extern unsigned long max_pfn_mapped; 47extern unsigned long max_pfn_mapped;
47 48
49static inline phys_addr_t get_max_mapped(void)
50{
51 return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
52}
53
48extern unsigned long init_memory_mapping(unsigned long start, 54extern unsigned long init_memory_mapping(unsigned long start,
49 unsigned long end); 55 unsigned long end);
50 56
51extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn, 57extern void initmem_init(void);
52 int acpi, int k8);
53extern void free_initmem(void); 58extern void free_initmem(void);
54 59
55#endif /* !__ASSEMBLY__ */ 60#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index e2f6a99f14ab..cc29086e30cd 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -22,6 +22,7 @@
22 22
23#define ARCH_P4_CNTRVAL_BITS (40) 23#define ARCH_P4_CNTRVAL_BITS (40)
24#define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1) 24#define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1)
25#define ARCH_P4_UNFLAGGED_BIT ((1ULL) << (ARCH_P4_CNTRVAL_BITS - 1))
25 26
26#define P4_ESCR_EVENT_MASK 0x7e000000U 27#define P4_ESCR_EVENT_MASK 0x7e000000U
27#define P4_ESCR_EVENT_SHIFT 25 28#define P4_ESCR_EVENT_SHIFT 25
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 45636cefa186..4c25ab48257b 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -94,10 +94,6 @@ struct cpuinfo_x86 {
94 int x86_cache_alignment; /* In bytes */ 94 int x86_cache_alignment; /* In bytes */
95 int x86_power; 95 int x86_power;
96 unsigned long loops_per_jiffy; 96 unsigned long loops_per_jiffy;
97#ifdef CONFIG_SMP
98 /* cpus sharing the last level cache: */
99 cpumask_var_t llc_shared_map;
100#endif
101 /* cpuid returned max cores value: */ 97 /* cpuid returned max cores value: */
102 u16 x86_max_cores; 98 u16 x86_max_cores;
103 u16 apicid; 99 u16 apicid;
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index d1e41b0f9b60..df4cd32b4cc6 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -37,26 +37,9 @@
37#endif 37#endif
38 38
39#ifdef __KERNEL__ 39#ifdef __KERNEL__
40
41#include <linux/list.h>
42#include <linux/spinlock.h>
43#include <linux/lockdep.h>
44#include <asm/asm.h> 40#include <asm/asm.h>
45 41
46struct rwsem_waiter;
47
48extern asmregparm struct rw_semaphore *
49 rwsem_down_read_failed(struct rw_semaphore *sem);
50extern asmregparm struct rw_semaphore *
51 rwsem_down_write_failed(struct rw_semaphore *sem);
52extern asmregparm struct rw_semaphore *
53 rwsem_wake(struct rw_semaphore *);
54extern asmregparm struct rw_semaphore *
55 rwsem_downgrade_wake(struct rw_semaphore *sem);
56
57/* 42/*
58 * the semaphore definition
59 *
60 * The bias values and the counter type limits the number of 43 * The bias values and the counter type limits the number of
61 * potential readers/writers to 32767 for 32 bits and 2147483647 44 * potential readers/writers to 32767 for 32 bits and 2147483647
62 * for 64 bits. 45 * for 64 bits.
@@ -74,43 +57,6 @@ extern asmregparm struct rw_semaphore *
74#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS 57#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
75#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 58#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
76 59
77typedef signed long rwsem_count_t;
78
79struct rw_semaphore {
80 rwsem_count_t count;
81 spinlock_t wait_lock;
82 struct list_head wait_list;
83#ifdef CONFIG_DEBUG_LOCK_ALLOC
84 struct lockdep_map dep_map;
85#endif
86};
87
88#ifdef CONFIG_DEBUG_LOCK_ALLOC
89# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
90#else
91# define __RWSEM_DEP_MAP_INIT(lockname)
92#endif
93
94
95#define __RWSEM_INITIALIZER(name) \
96{ \
97 RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
98 LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) \
99}
100
101#define DECLARE_RWSEM(name) \
102 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
103
104extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
105 struct lock_class_key *key);
106
107#define init_rwsem(sem) \
108do { \
109 static struct lock_class_key __key; \
110 \
111 __init_rwsem((sem), #sem, &__key); \
112} while (0)
113
114/* 60/*
115 * lock for reading 61 * lock for reading
116 */ 62 */
@@ -133,7 +79,7 @@ static inline void __down_read(struct rw_semaphore *sem)
133 */ 79 */
134static inline int __down_read_trylock(struct rw_semaphore *sem) 80static inline int __down_read_trylock(struct rw_semaphore *sem)
135{ 81{
136 rwsem_count_t result, tmp; 82 long result, tmp;
137 asm volatile("# beginning __down_read_trylock\n\t" 83 asm volatile("# beginning __down_read_trylock\n\t"
138 " mov %0,%1\n\t" 84 " mov %0,%1\n\t"
139 "1:\n\t" 85 "1:\n\t"
@@ -155,7 +101,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
155 */ 101 */
156static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) 102static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
157{ 103{
158 rwsem_count_t tmp; 104 long tmp;
159 asm volatile("# beginning down_write\n\t" 105 asm volatile("# beginning down_write\n\t"
160 LOCK_PREFIX " xadd %1,(%2)\n\t" 106 LOCK_PREFIX " xadd %1,(%2)\n\t"
161 /* adds 0xffff0001, returns the old value */ 107 /* adds 0xffff0001, returns the old value */
@@ -180,9 +126,8 @@ static inline void __down_write(struct rw_semaphore *sem)
180 */ 126 */
181static inline int __down_write_trylock(struct rw_semaphore *sem) 127static inline int __down_write_trylock(struct rw_semaphore *sem)
182{ 128{
183 rwsem_count_t ret = cmpxchg(&sem->count, 129 long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
184 RWSEM_UNLOCKED_VALUE, 130 RWSEM_ACTIVE_WRITE_BIAS);
185 RWSEM_ACTIVE_WRITE_BIAS);
186 if (ret == RWSEM_UNLOCKED_VALUE) 131 if (ret == RWSEM_UNLOCKED_VALUE)
187 return 1; 132 return 1;
188 return 0; 133 return 0;
@@ -193,7 +138,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
193 */ 138 */
194static inline void __up_read(struct rw_semaphore *sem) 139static inline void __up_read(struct rw_semaphore *sem)
195{ 140{
196 rwsem_count_t tmp; 141 long tmp;
197 asm volatile("# beginning __up_read\n\t" 142 asm volatile("# beginning __up_read\n\t"
198 LOCK_PREFIX " xadd %1,(%2)\n\t" 143 LOCK_PREFIX " xadd %1,(%2)\n\t"
199 /* subtracts 1, returns the old value */ 144 /* subtracts 1, returns the old value */
@@ -211,7 +156,7 @@ static inline void __up_read(struct rw_semaphore *sem)
211 */ 156 */
212static inline void __up_write(struct rw_semaphore *sem) 157static inline void __up_write(struct rw_semaphore *sem)
213{ 158{
214 rwsem_count_t tmp; 159 long tmp;
215 asm volatile("# beginning __up_write\n\t" 160 asm volatile("# beginning __up_write\n\t"
216 LOCK_PREFIX " xadd %1,(%2)\n\t" 161 LOCK_PREFIX " xadd %1,(%2)\n\t"
217 /* subtracts 0xffff0001, returns the old value */ 162 /* subtracts 0xffff0001, returns the old value */
@@ -247,8 +192,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
247/* 192/*
248 * implement atomic add functionality 193 * implement atomic add functionality
249 */ 194 */
250static inline void rwsem_atomic_add(rwsem_count_t delta, 195static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
251 struct rw_semaphore *sem)
252{ 196{
253 asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0" 197 asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0"
254 : "+m" (sem->count) 198 : "+m" (sem->count)
@@ -258,10 +202,9 @@ static inline void rwsem_atomic_add(rwsem_count_t delta,
258/* 202/*
259 * implement exchange and add functionality 203 * implement exchange and add functionality
260 */ 204 */
261static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta, 205static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
262 struct rw_semaphore *sem)
263{ 206{
264 rwsem_count_t tmp = delta; 207 long tmp = delta;
265 208
266 asm volatile(LOCK_PREFIX "xadd %0,%1" 209 asm volatile(LOCK_PREFIX "xadd %0,%1"
267 : "+r" (tmp), "+m" (sem->count) 210 : "+r" (tmp), "+m" (sem->count)
@@ -270,10 +213,5 @@ static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta,
270 return tmp + delta; 213 return tmp + delta;
271} 214}
272 215
273static inline int rwsem_is_locked(struct rw_semaphore *sem)
274{
275 return (sem->count != 0);
276}
277
278#endif /* __KERNEL__ */ 216#endif /* __KERNEL__ */
279#endif /* _ASM_X86_RWSEM_H */ 217#endif /* _ASM_X86_RWSEM_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 1f4695136776..73b11bc0ae6f 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -17,12 +17,24 @@
17#endif 17#endif
18#include <asm/thread_info.h> 18#include <asm/thread_info.h>
19#include <asm/cpumask.h> 19#include <asm/cpumask.h>
20#include <asm/cpufeature.h>
20 21
21extern int smp_num_siblings; 22extern int smp_num_siblings;
22extern unsigned int num_processors; 23extern unsigned int num_processors;
23 24
25static inline bool cpu_has_ht_siblings(void)
26{
27 bool has_siblings = false;
28#ifdef CONFIG_SMP
29 has_siblings = cpu_has_ht && smp_num_siblings > 1;
30#endif
31 return has_siblings;
32}
33
24DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map); 34DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
25DECLARE_PER_CPU(cpumask_var_t, cpu_core_map); 35DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
36/* cpus sharing the last level cache: */
37DECLARE_PER_CPU(cpumask_var_t, cpu_llc_shared_map);
26DECLARE_PER_CPU(u16, cpu_llc_id); 38DECLARE_PER_CPU(u16, cpu_llc_id);
27DECLARE_PER_CPU(int, cpu_number); 39DECLARE_PER_CPU(int, cpu_number);
28 40
@@ -36,8 +48,16 @@ static inline struct cpumask *cpu_core_mask(int cpu)
36 return per_cpu(cpu_core_map, cpu); 48 return per_cpu(cpu_core_map, cpu);
37} 49}
38 50
51static inline struct cpumask *cpu_llc_shared_mask(int cpu)
52{
53 return per_cpu(cpu_llc_shared_map, cpu);
54}
55
39DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid); 56DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
40DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); 57DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
58#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
59DECLARE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid);
60#endif
41 61
42/* Static state in head.S used to set up a CPU */ 62/* Static state in head.S used to set up a CPU */
43extern unsigned long stack_start; /* Initial stack pointer address */ 63extern unsigned long stack_start; /* Initial stack pointer address */
diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h
index 6c22bf353f26..725b77831993 100644
--- a/arch/x86/include/asm/smpboot_hooks.h
+++ b/arch/x86/include/asm/smpboot_hooks.h
@@ -34,7 +34,7 @@ static inline void smpboot_restore_warm_reset_vector(void)
34 */ 34 */
35 CMOS_WRITE(0, 0xf); 35 CMOS_WRITE(0, 0xf);
36 36
37 *((volatile long *)phys_to_virt(apic->trampoline_phys_low)) = 0; 37 *((volatile u32 *)phys_to_virt(apic->trampoline_phys_low)) = 0;
38} 38}
39 39
40static inline void __init smpboot_setup_io_apic(void) 40static inline void __init smpboot_setup_io_apic(void)
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 33ecc3ea8782..12569e691ce3 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -98,8 +98,6 @@ do { \
98 */ 98 */
99#define HAVE_DISABLE_HLT 99#define HAVE_DISABLE_HLT
100#else 100#else
101#define __SAVE(reg, offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t"
102#define __RESTORE(reg, offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t"
103 101
104/* frame pointer must be last for get_wchan */ 102/* frame pointer must be last for get_wchan */
105#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" 103#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 21899cc31e52..910a7084f7f2 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -47,21 +47,6 @@
47 47
48#include <asm/mpspec.h> 48#include <asm/mpspec.h>
49 49
50#ifdef CONFIG_X86_32
51
52/* Mappings between logical cpu number and node number */
53extern int cpu_to_node_map[];
54
55/* Returns the number of the node containing CPU 'cpu' */
56static inline int __cpu_to_node(int cpu)
57{
58 return cpu_to_node_map[cpu];
59}
60#define early_cpu_to_node __cpu_to_node
61#define cpu_to_node __cpu_to_node
62
63#else /* CONFIG_X86_64 */
64
65/* Mappings between logical cpu number and node number */ 50/* Mappings between logical cpu number and node number */
66DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); 51DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
67 52
@@ -84,8 +69,6 @@ static inline int early_cpu_to_node(int cpu)
84 69
85#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ 70#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
86 71
87#endif /* CONFIG_X86_64 */
88
89/* Mappings between node number and cpus on that node. */ 72/* Mappings between node number and cpus on that node. */
90extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; 73extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
91 74
@@ -155,7 +138,7 @@ extern unsigned long node_remap_size[];
155 .balance_interval = 1, \ 138 .balance_interval = 1, \
156} 139}
157 140
158#ifdef CONFIG_X86_64_ACPI_NUMA 141#ifdef CONFIG_X86_64
159extern int __node_distance(int, int); 142extern int __node_distance(int, int);
160#define node_distance(a, b) __node_distance(a, b) 143#define node_distance(a, b) __node_distance(a, b)
161#endif 144#endif
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index b766a5e8ba0e..ffaf183c619a 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -346,10 +346,13 @@
346#define __NR_fanotify_init 338 346#define __NR_fanotify_init 338
347#define __NR_fanotify_mark 339 347#define __NR_fanotify_mark 339
348#define __NR_prlimit64 340 348#define __NR_prlimit64 340
349#define __NR_name_to_handle_at 341
350#define __NR_open_by_handle_at 342
351#define __NR_clock_adjtime 343
349 352
350#ifdef __KERNEL__ 353#ifdef __KERNEL__
351 354
352#define NR_syscalls 341 355#define NR_syscalls 344
353 356
354#define __ARCH_WANT_IPC_PARSE_VERSION 357#define __ARCH_WANT_IPC_PARSE_VERSION
355#define __ARCH_WANT_OLD_READDIR 358#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 363e9b8a715b..5466bea670e7 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -669,6 +669,12 @@ __SYSCALL(__NR_fanotify_init, sys_fanotify_init)
669__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) 669__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
670#define __NR_prlimit64 302 670#define __NR_prlimit64 302
671__SYSCALL(__NR_prlimit64, sys_prlimit64) 671__SYSCALL(__NR_prlimit64, sys_prlimit64)
672#define __NR_name_to_handle_at 303
673__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
674#define __NR_open_by_handle_at 304
675__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
676#define __NR_clock_adjtime 305
677__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime)
672 678
673#ifndef __NO_STUBS 679#ifndef __NO_STUBS
674#define __ARCH_WANT_OLD_READDIR 680#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index ce1d54c8a433..3e094af443c3 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -176,7 +176,7 @@ struct bau_msg_payload {
176struct bau_msg_header { 176struct bau_msg_header {
177 unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ 177 unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */
178 /* bits 5:0 */ 178 /* bits 5:0 */
179 unsigned int base_dest_nodeid:15; /* nasid (pnode<<1) of */ 179 unsigned int base_dest_nodeid:15; /* nasid of the */
180 /* bits 20:6 */ /* first bit in uvhub map */ 180 /* bits 20:6 */ /* first bit in uvhub map */
181 unsigned int command:8; /* message type */ 181 unsigned int command:8; /* message type */
182 /* bits 28:21 */ 182 /* bits 28:21 */
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index a3c28ae4025b..8508bfe52296 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -287,7 +287,7 @@ HYPERVISOR_fpu_taskswitch(int set)
287static inline int 287static inline int
288HYPERVISOR_sched_op(int cmd, void *arg) 288HYPERVISOR_sched_op(int cmd, void *arg)
289{ 289{
290 return _hypercall2(int, sched_op_new, cmd, arg); 290 return _hypercall2(int, sched_op, cmd, arg);
291} 291}
292 292
293static inline long 293static inline long
@@ -422,10 +422,17 @@ HYPERVISOR_set_segment_base(int reg, unsigned long value)
422#endif 422#endif
423 423
424static inline int 424static inline int
425HYPERVISOR_suspend(unsigned long srec) 425HYPERVISOR_suspend(unsigned long start_info_mfn)
426{ 426{
427 return _hypercall3(int, sched_op, SCHEDOP_shutdown, 427 struct sched_shutdown r = { .reason = SHUTDOWN_suspend };
428 SHUTDOWN_suspend, srec); 428
429 /*
430 * For a PV guest the tools require that the start_info mfn be
431 * present in rdx/edx when the hypercall is made. Per the
432 * hypercall calling convention this is the third hypercall
433 * argument, which is start_info_mfn here.
434 */
435 return _hypercall3(int, sched_op, SCHEDOP_shutdown, &r, start_info_mfn);
429} 436}
430 437
431static inline int 438static inline int
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index f25bdf238a33..c61934fbf22a 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -29,8 +29,10 @@ typedef struct xpaddr {
29 29
30/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ 30/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
31#define INVALID_P2M_ENTRY (~0UL) 31#define INVALID_P2M_ENTRY (~0UL)
32#define FOREIGN_FRAME_BIT (1UL<<31) 32#define FOREIGN_FRAME_BIT (1UL<<(BITS_PER_LONG-1))
33#define IDENTITY_FRAME_BIT (1UL<<(BITS_PER_LONG-2))
33#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) 34#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT)
35#define IDENTITY_FRAME(m) ((m) | IDENTITY_FRAME_BIT)
34 36
35/* Maximum amount of memory we can handle in a domain in pages */ 37/* Maximum amount of memory we can handle in a domain in pages */
36#define MAX_DOMAIN_PAGES \ 38#define MAX_DOMAIN_PAGES \
@@ -41,12 +43,18 @@ extern unsigned int machine_to_phys_order;
41 43
42extern unsigned long get_phys_to_machine(unsigned long pfn); 44extern unsigned long get_phys_to_machine(unsigned long pfn);
43extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); 45extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
46extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
47extern unsigned long set_phys_range_identity(unsigned long pfn_s,
48 unsigned long pfn_e);
44 49
45extern int m2p_add_override(unsigned long mfn, struct page *page); 50extern int m2p_add_override(unsigned long mfn, struct page *page);
46extern int m2p_remove_override(struct page *page); 51extern int m2p_remove_override(struct page *page);
47extern struct page *m2p_find_override(unsigned long mfn); 52extern struct page *m2p_find_override(unsigned long mfn);
48extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); 53extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
49 54
55#ifdef CONFIG_XEN_DEBUG_FS
56extern int p2m_dump_show(struct seq_file *m, void *v);
57#endif
50static inline unsigned long pfn_to_mfn(unsigned long pfn) 58static inline unsigned long pfn_to_mfn(unsigned long pfn)
51{ 59{
52 unsigned long mfn; 60 unsigned long mfn;
@@ -57,7 +65,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn)
57 mfn = get_phys_to_machine(pfn); 65 mfn = get_phys_to_machine(pfn);
58 66
59 if (mfn != INVALID_P2M_ENTRY) 67 if (mfn != INVALID_P2M_ENTRY)
60 mfn &= ~FOREIGN_FRAME_BIT; 68 mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
61 69
62 return mfn; 70 return mfn;
63} 71}
@@ -73,25 +81,44 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn)
73static inline unsigned long mfn_to_pfn(unsigned long mfn) 81static inline unsigned long mfn_to_pfn(unsigned long mfn)
74{ 82{
75 unsigned long pfn; 83 unsigned long pfn;
84 int ret = 0;
76 85
77 if (xen_feature(XENFEAT_auto_translated_physmap)) 86 if (xen_feature(XENFEAT_auto_translated_physmap))
78 return mfn; 87 return mfn;
79 88
89 if (unlikely((mfn >> machine_to_phys_order) != 0)) {
90 pfn = ~0;
91 goto try_override;
92 }
80 pfn = 0; 93 pfn = 0;
81 /* 94 /*
82 * The array access can fail (e.g., device space beyond end of RAM). 95 * The array access can fail (e.g., device space beyond end of RAM).
83 * In such cases it doesn't matter what we return (we return garbage), 96 * In such cases it doesn't matter what we return (we return garbage),
84 * but we must handle the fault without crashing! 97 * but we must handle the fault without crashing!
85 */ 98 */
86 __get_user(pfn, &machine_to_phys_mapping[mfn]); 99 ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
87 100try_override:
88 /* 101 /* ret might be < 0 if there are no entries in the m2p for mfn */
89 * If this appears to be a foreign mfn (because the pfn 102 if (ret < 0)
90 * doesn't map back to the mfn), then check the local override 103 pfn = ~0;
91 * table to see if there's a better pfn to use. 104 else if (get_phys_to_machine(pfn) != mfn)
105 /*
106 * If this appears to be a foreign mfn (because the pfn
107 * doesn't map back to the mfn), then check the local override
108 * table to see if there's a better pfn to use.
109 *
110 * m2p_find_override_pfn returns ~0 if it doesn't find anything.
111 */
112 pfn = m2p_find_override_pfn(mfn, ~0);
113
114 /*
115 * pfn is ~0 if there are no entries in the m2p for mfn or if the
116 * entry doesn't map back to the mfn and m2p_override doesn't have a
117 * valid entry for it.
92 */ 118 */
93 if (get_phys_to_machine(pfn) != mfn) 119 if (pfn == ~0 &&
94 pfn = m2p_find_override_pfn(mfn, pfn); 120 get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn))
121 pfn = mfn;
95 122
96 return pfn; 123 return pfn;
97} 124}
diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
index 2329b3eaf8d3..aa8620989162 100644
--- a/arch/x86/include/asm/xen/pci.h
+++ b/arch/x86/include/asm/xen/pci.h
@@ -27,16 +27,16 @@ static inline void __init xen_setup_pirqs(void)
27 * its own functions. 27 * its own functions.
28 */ 28 */
29struct xen_pci_frontend_ops { 29struct xen_pci_frontend_ops {
30 int (*enable_msi)(struct pci_dev *dev, int **vectors); 30 int (*enable_msi)(struct pci_dev *dev, int vectors[]);
31 void (*disable_msi)(struct pci_dev *dev); 31 void (*disable_msi)(struct pci_dev *dev);
32 int (*enable_msix)(struct pci_dev *dev, int **vectors, int nvec); 32 int (*enable_msix)(struct pci_dev *dev, int vectors[], int nvec);
33 void (*disable_msix)(struct pci_dev *dev); 33 void (*disable_msix)(struct pci_dev *dev);
34}; 34};
35 35
36extern struct xen_pci_frontend_ops *xen_pci_frontend; 36extern struct xen_pci_frontend_ops *xen_pci_frontend;
37 37
38static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev, 38static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev,
39 int **vectors) 39 int vectors[])
40{ 40{
41 if (xen_pci_frontend && xen_pci_frontend->enable_msi) 41 if (xen_pci_frontend && xen_pci_frontend->enable_msi)
42 return xen_pci_frontend->enable_msi(dev, vectors); 42 return xen_pci_frontend->enable_msi(dev, vectors);
@@ -48,7 +48,7 @@ static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev)
48 xen_pci_frontend->disable_msi(dev); 48 xen_pci_frontend->disable_msi(dev);
49} 49}
50static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev, 50static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev,
51 int **vectors, int nvec) 51 int vectors[], int nvec)
52{ 52{
53 if (xen_pci_frontend && xen_pci_frontend->enable_msix) 53 if (xen_pci_frontend && xen_pci_frontend->enable_msix)
54 return xen_pci_frontend->enable_msix(dev, vectors, nvec); 54 return xen_pci_frontend->enable_msix(dev, vectors, nvec);
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index b3a71137983a..9a966c579af5 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -72,6 +72,7 @@ u8 acpi_sci_flags __initdata;
72int acpi_sci_override_gsi __initdata; 72int acpi_sci_override_gsi __initdata;
73int acpi_skip_timer_override __initdata; 73int acpi_skip_timer_override __initdata;
74int acpi_use_timer_override __initdata; 74int acpi_use_timer_override __initdata;
75int acpi_fix_pin2_polarity __initdata;
75 76
76#ifdef CONFIG_X86_LOCAL_APIC 77#ifdef CONFIG_X86_LOCAL_APIC
77static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; 78static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
@@ -415,10 +416,15 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
415 return 0; 416 return 0;
416 } 417 }
417 418
418 if (acpi_skip_timer_override && 419 if (intsrc->source_irq == 0 && intsrc->global_irq == 2) {
419 intsrc->source_irq == 0 && intsrc->global_irq == 2) { 420 if (acpi_skip_timer_override) {
420 printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n"); 421 printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
421 return 0; 422 return 0;
423 }
424 if (acpi_fix_pin2_polarity && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) {
425 intsrc->inti_flags &= ~ACPI_MADT_POLARITY_MASK;
426 printk(PREFIX "BIOS IRQ0 pin2 override: forcing polarity to high active.\n");
427 }
422 } 428 }
423 429
424 mp_override_legacy_irq(intsrc->source_irq, 430 mp_override_legacy_irq(intsrc->source_irq,
@@ -589,14 +595,8 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
589 nid = acpi_get_node(handle); 595 nid = acpi_get_node(handle);
590 if (nid == -1 || !node_online(nid)) 596 if (nid == -1 || !node_online(nid))
591 return; 597 return;
592#ifdef CONFIG_X86_64 598 set_apicid_to_node(physid, nid);
593 apicid_to_node[physid] = nid;
594 numa_set_node(cpu, nid); 599 numa_set_node(cpu, nid);
595#else /* CONFIG_X86_32 */
596 apicid_2_node[physid] = nid;
597 cpu_to_node_map[cpu] = nid;
598#endif
599
600#endif 600#endif
601} 601}
602 602
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 0a99f7198bc3..ed3c2e5b714a 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -12,7 +12,7 @@
12 12
13static u32 *flush_words; 13static u32 *flush_words;
14 14
15struct pci_device_id amd_nb_misc_ids[] = { 15const struct pci_device_id amd_nb_misc_ids[] = {
16 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, 16 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
17 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, 17 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
18 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) }, 18 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) },
@@ -20,6 +20,11 @@ struct pci_device_id amd_nb_misc_ids[] = {
20}; 20};
21EXPORT_SYMBOL(amd_nb_misc_ids); 21EXPORT_SYMBOL(amd_nb_misc_ids);
22 22
23static struct pci_device_id amd_nb_link_ids[] = {
24 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_LINK) },
25 {}
26};
27
23const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[] __initconst = { 28const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[] __initconst = {
24 { 0x00, 0x18, 0x20 }, 29 { 0x00, 0x18, 0x20 },
25 { 0xff, 0x00, 0x20 }, 30 { 0xff, 0x00, 0x20 },
@@ -31,7 +36,7 @@ struct amd_northbridge_info amd_northbridges;
31EXPORT_SYMBOL(amd_northbridges); 36EXPORT_SYMBOL(amd_northbridges);
32 37
33static struct pci_dev *next_northbridge(struct pci_dev *dev, 38static struct pci_dev *next_northbridge(struct pci_dev *dev,
34 struct pci_device_id *ids) 39 const struct pci_device_id *ids)
35{ 40{
36 do { 41 do {
37 dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); 42 dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
@@ -45,7 +50,7 @@ int amd_cache_northbridges(void)
45{ 50{
46 int i = 0; 51 int i = 0;
47 struct amd_northbridge *nb; 52 struct amd_northbridge *nb;
48 struct pci_dev *misc; 53 struct pci_dev *misc, *link;
49 54
50 if (amd_nb_num()) 55 if (amd_nb_num())
51 return 0; 56 return 0;
@@ -64,10 +69,12 @@ int amd_cache_northbridges(void)
64 amd_northbridges.nb = nb; 69 amd_northbridges.nb = nb;
65 amd_northbridges.num = i; 70 amd_northbridges.num = i;
66 71
67 misc = NULL; 72 link = misc = NULL;
68 for (i = 0; i != amd_nb_num(); i++) { 73 for (i = 0; i != amd_nb_num(); i++) {
69 node_to_amd_nb(i)->misc = misc = 74 node_to_amd_nb(i)->misc = misc =
70 next_northbridge(misc, amd_nb_misc_ids); 75 next_northbridge(misc, amd_nb_misc_ids);
76 node_to_amd_nb(i)->link = link =
77 next_northbridge(link, amd_nb_link_ids);
71 } 78 }
72 79
73 /* some CPU families (e.g. family 0x11) do not support GART */ 80 /* some CPU families (e.g. family 0x11) do not support GART */
@@ -85,6 +92,13 @@ int amd_cache_northbridges(void)
85 boot_cpu_data.x86_mask >= 0x1)) 92 boot_cpu_data.x86_mask >= 0x1))
86 amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE; 93 amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
87 94
95 if (boot_cpu_data.x86 == 0x15)
96 amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
97
98 /* L3 cache partitioning is supported on family 0x15 */
99 if (boot_cpu_data.x86 == 0x15)
100 amd_northbridges.flags |= AMD_NB_L3_PARTITIONING;
101
88 return 0; 102 return 0;
89} 103}
90EXPORT_SYMBOL_GPL(amd_cache_northbridges); 104EXPORT_SYMBOL_GPL(amd_cache_northbridges);
@@ -93,8 +107,9 @@ EXPORT_SYMBOL_GPL(amd_cache_northbridges);
93 they're useless anyways */ 107 they're useless anyways */
94int __init early_is_amd_nb(u32 device) 108int __init early_is_amd_nb(u32 device)
95{ 109{
96 struct pci_device_id *id; 110 const struct pci_device_id *id;
97 u32 vendor = device & 0xffff; 111 u32 vendor = device & 0xffff;
112
98 device >>= 16; 113 device >>= 16;
99 for (id = amd_nb_misc_ids; id->vendor; id++) 114 for (id = amd_nb_misc_ids; id->vendor; id++)
100 if (vendor == id->vendor && device == id->device) 115 if (vendor == id->vendor && device == id->device)
@@ -102,6 +117,65 @@ int __init early_is_amd_nb(u32 device)
102 return 0; 117 return 0;
103} 118}
104 119
120int amd_get_subcaches(int cpu)
121{
122 struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link;
123 unsigned int mask;
124 int cuid = 0;
125
126 if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
127 return 0;
128
129 pci_read_config_dword(link, 0x1d4, &mask);
130
131#ifdef CONFIG_SMP
132 cuid = cpu_data(cpu).compute_unit_id;
133#endif
134 return (mask >> (4 * cuid)) & 0xf;
135}
136
137int amd_set_subcaches(int cpu, int mask)
138{
139 static unsigned int reset, ban;
140 struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu));
141 unsigned int reg;
142 int cuid = 0;
143
144 if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING) || mask > 0xf)
145 return -EINVAL;
146
147 /* if necessary, collect reset state of L3 partitioning and BAN mode */
148 if (reset == 0) {
149 pci_read_config_dword(nb->link, 0x1d4, &reset);
150 pci_read_config_dword(nb->misc, 0x1b8, &ban);
151 ban &= 0x180000;
152 }
153
154 /* deactivate BAN mode if any subcaches are to be disabled */
155 if (mask != 0xf) {
156 pci_read_config_dword(nb->misc, 0x1b8, &reg);
157 pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000);
158 }
159
160#ifdef CONFIG_SMP
161 cuid = cpu_data(cpu).compute_unit_id;
162#endif
163 mask <<= 4 * cuid;
164 mask |= (0xf ^ (1 << cuid)) << 26;
165
166 pci_write_config_dword(nb->link, 0x1d4, mask);
167
168 /* reset BAN mode if L3 partitioning returned to reset state */
169 pci_read_config_dword(nb->link, 0x1d4, &reg);
170 if (reg == reset) {
171 pci_read_config_dword(nb->misc, 0x1b8, &reg);
172 reg &= ~0x180000;
173 pci_write_config_dword(nb->misc, 0x1b8, reg | ban);
174 }
175
176 return 0;
177}
178
105int amd_cache_gart(void) 179int amd_cache_gart(void)
106{ 180{
107 int i; 181 int i;
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 671d5aad7a0c..1293c709ee85 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -284,7 +284,7 @@ static int __init apbt_clockevent_register(void)
284 memcpy(&adev->evt, &apbt_clockevent, sizeof(struct clock_event_device)); 284 memcpy(&adev->evt, &apbt_clockevent, sizeof(struct clock_event_device));
285 285
286 if (mrst_timer_options == MRST_TIMER_LAPIC_APBT) { 286 if (mrst_timer_options == MRST_TIMER_LAPIC_APBT) {
287 apbt_clockevent.rating = APBT_CLOCKEVENT_RATING - 100; 287 adev->evt.rating = APBT_CLOCKEVENT_RATING - 100;
288 global_clock_event = &adev->evt; 288 global_clock_event = &adev->evt;
289 printk(KERN_DEBUG "%s clockevent registered as global\n", 289 printk(KERN_DEBUG "%s clockevent registered as global\n",
290 global_clock_event->name); 290 global_clock_event->name);
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 5955a7800a96..7b1e8e10b89c 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -13,7 +13,7 @@
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/types.h> 14#include <linux/types.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/bootmem.h> 16#include <linux/memblock.h>
17#include <linux/mmzone.h> 17#include <linux/mmzone.h>
18#include <linux/pci_ids.h> 18#include <linux/pci_ids.h>
19#include <linux/pci.h> 19#include <linux/pci.h>
@@ -57,7 +57,7 @@ static void __init insert_aperture_resource(u32 aper_base, u32 aper_size)
57static u32 __init allocate_aperture(void) 57static u32 __init allocate_aperture(void)
58{ 58{
59 u32 aper_size; 59 u32 aper_size;
60 void *p; 60 unsigned long addr;
61 61
62 /* aper_size should <= 1G */ 62 /* aper_size should <= 1G */
63 if (fallback_aper_order > 5) 63 if (fallback_aper_order > 5)
@@ -83,27 +83,26 @@ static u32 __init allocate_aperture(void)
83 * so don't use 512M below as gart iommu, leave the space for kernel 83 * so don't use 512M below as gart iommu, leave the space for kernel
84 * code for safe 84 * code for safe
85 */ 85 */
86 p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20); 86 addr = memblock_find_in_range(0, 1ULL<<32, aper_size, 512ULL<<20);
87 if (addr == MEMBLOCK_ERROR || addr + aper_size > 0xffffffff) {
88 printk(KERN_ERR
89 "Cannot allocate aperture memory hole (%lx,%uK)\n",
90 addr, aper_size>>10);
91 return 0;
92 }
93 memblock_x86_reserve_range(addr, addr + aper_size, "aperture64");
87 /* 94 /*
88 * Kmemleak should not scan this block as it may not be mapped via the 95 * Kmemleak should not scan this block as it may not be mapped via the
89 * kernel direct mapping. 96 * kernel direct mapping.
90 */ 97 */
91 kmemleak_ignore(p); 98 kmemleak_ignore(phys_to_virt(addr));
92 if (!p || __pa(p)+aper_size > 0xffffffff) {
93 printk(KERN_ERR
94 "Cannot allocate aperture memory hole (%p,%uK)\n",
95 p, aper_size>>10);
96 if (p)
97 free_bootmem(__pa(p), aper_size);
98 return 0;
99 }
100 printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", 99 printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n",
101 aper_size >> 10, __pa(p)); 100 aper_size >> 10, addr);
102 insert_aperture_resource((u32)__pa(p), aper_size); 101 insert_aperture_resource((u32)addr, aper_size);
103 register_nosave_region((u32)__pa(p) >> PAGE_SHIFT, 102 register_nosave_region(addr >> PAGE_SHIFT,
104 (u32)__pa(p+aper_size) >> PAGE_SHIFT); 103 (addr+aper_size) >> PAGE_SHIFT);
105 104
106 return (u32)__pa(p); 105 return (u32)addr;
107} 106}
108 107
109 108
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ffbf7c21bbc6..966673f44141 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -79,6 +79,15 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
79EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); 79EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
80 80
81#ifdef CONFIG_X86_32 81#ifdef CONFIG_X86_32
82
83/*
84 * On x86_32, the mapping between cpu and logical apicid may vary
85 * depending on apic in use. The following early percpu variable is
86 * used for the mapping. This is where the behaviors of x86_64 and 32
87 * actually diverge. Let's keep it ugly for now.
88 */
89DEFINE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid, BAD_APICID);
90
82/* 91/*
83 * Knob to control our willingness to enable the local APIC. 92 * Knob to control our willingness to enable the local APIC.
84 * 93 *
@@ -1217,6 +1226,19 @@ void __cpuinit setup_local_APIC(void)
1217 */ 1226 */
1218 apic->init_apic_ldr(); 1227 apic->init_apic_ldr();
1219 1228
1229#ifdef CONFIG_X86_32
1230 /*
1231 * APIC LDR is initialized. If logical_apicid mapping was
1232 * initialized during get_smp_config(), make sure it matches the
1233 * actual value.
1234 */
1235 i = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
1236 WARN_ON(i != BAD_APICID && i != logical_smp_processor_id());
1237 /* always use the value from LDR */
1238 early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
1239 logical_smp_processor_id();
1240#endif
1241
1220 /* 1242 /*
1221 * Set Task Priority to 'accept all'. We never change this 1243 * Set Task Priority to 'accept all'. We never change this
1222 * later on. 1244 * later on.
@@ -1910,17 +1932,6 @@ void __cpuinit generic_processor_info(int apicid, int version)
1910{ 1932{
1911 int cpu; 1933 int cpu;
1912 1934
1913 /*
1914 * Validate version
1915 */
1916 if (version == 0x0) {
1917 pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
1918 "fixing up to 0x10. (tell your hw vendor)\n",
1919 version);
1920 version = 0x10;
1921 }
1922 apic_version[apicid] = version;
1923
1924 if (num_processors >= nr_cpu_ids) { 1935 if (num_processors >= nr_cpu_ids) {
1925 int max = nr_cpu_ids; 1936 int max = nr_cpu_ids;
1926 int thiscpu = max + disabled_cpus; 1937 int thiscpu = max + disabled_cpus;
@@ -1934,22 +1945,34 @@ void __cpuinit generic_processor_info(int apicid, int version)
1934 } 1945 }
1935 1946
1936 num_processors++; 1947 num_processors++;
1937 cpu = cpumask_next_zero(-1, cpu_present_mask);
1938
1939 if (version != apic_version[boot_cpu_physical_apicid])
1940 WARN_ONCE(1,
1941 "ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n",
1942 apic_version[boot_cpu_physical_apicid], cpu, version);
1943
1944 physid_set(apicid, phys_cpu_present_map);
1945 if (apicid == boot_cpu_physical_apicid) { 1948 if (apicid == boot_cpu_physical_apicid) {
1946 /* 1949 /*
1947 * x86_bios_cpu_apicid is required to have processors listed 1950 * x86_bios_cpu_apicid is required to have processors listed
1948 * in same order as logical cpu numbers. Hence the first 1951 * in same order as logical cpu numbers. Hence the first
1949 * entry is BSP, and so on. 1952 * entry is BSP, and so on.
1953 * boot_cpu_init() already hold bit 0 in cpu_present_mask
1954 * for BSP.
1950 */ 1955 */
1951 cpu = 0; 1956 cpu = 0;
1957 } else
1958 cpu = cpumask_next_zero(-1, cpu_present_mask);
1959
1960 /*
1961 * Validate version
1962 */
1963 if (version == 0x0) {
1964 pr_warning("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
1965 cpu, apicid);
1966 version = 0x10;
1952 } 1967 }
1968 apic_version[apicid] = version;
1969
1970 if (version != apic_version[boot_cpu_physical_apicid]) {
1971 pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
1972 apic_version[boot_cpu_physical_apicid], cpu, version);
1973 }
1974
1975 physid_set(apicid, phys_cpu_present_map);
1953 if (apicid > max_physical_apicid) 1976 if (apicid > max_physical_apicid)
1954 max_physical_apicid = apicid; 1977 max_physical_apicid = apicid;
1955 1978
@@ -1957,7 +1980,10 @@ void __cpuinit generic_processor_info(int apicid, int version)
1957 early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; 1980 early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1958 early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; 1981 early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1959#endif 1982#endif
1960 1983#ifdef CONFIG_X86_32
1984 early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
1985 apic->x86_32_early_logical_apicid(cpu);
1986#endif
1961 set_cpu_possible(cpu, true); 1987 set_cpu_possible(cpu, true);
1962 set_cpu_present(cpu, true); 1988 set_cpu_present(cpu, true);
1963} 1989}
@@ -1978,10 +2004,14 @@ void default_init_apic_ldr(void)
1978} 2004}
1979 2005
1980#ifdef CONFIG_X86_32 2006#ifdef CONFIG_X86_32
1981int default_apicid_to_node(int logical_apicid) 2007int default_x86_32_numa_cpu_node(int cpu)
1982{ 2008{
1983#ifdef CONFIG_SMP 2009#ifdef CONFIG_NUMA
1984 return apicid_2_node[hard_smp_processor_id()]; 2010 int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
2011
2012 if (apicid != BAD_APICID)
2013 return __apicid_to_node[apicid];
2014 return NUMA_NO_NODE;
1985#else 2015#else
1986 return 0; 2016 return 0;
1987#endif 2017#endif
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 09d3b17ce0c2..5652d31fe108 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -185,8 +185,6 @@ struct apic apic_flat = {
185 .ioapic_phys_id_map = NULL, 185 .ioapic_phys_id_map = NULL,
186 .setup_apic_routing = NULL, 186 .setup_apic_routing = NULL,
187 .multi_timer_check = NULL, 187 .multi_timer_check = NULL,
188 .apicid_to_node = NULL,
189 .cpu_to_logical_apicid = NULL,
190 .cpu_present_to_apicid = default_cpu_present_to_apicid, 188 .cpu_present_to_apicid = default_cpu_present_to_apicid,
191 .apicid_to_cpu_present = NULL, 189 .apicid_to_cpu_present = NULL,
192 .setup_portio_remap = NULL, 190 .setup_portio_remap = NULL,
@@ -337,8 +335,6 @@ struct apic apic_physflat = {
337 .ioapic_phys_id_map = NULL, 335 .ioapic_phys_id_map = NULL,
338 .setup_apic_routing = NULL, 336 .setup_apic_routing = NULL,
339 .multi_timer_check = NULL, 337 .multi_timer_check = NULL,
340 .apicid_to_node = NULL,
341 .cpu_to_logical_apicid = NULL,
342 .cpu_present_to_apicid = default_cpu_present_to_apicid, 338 .cpu_present_to_apicid = default_cpu_present_to_apicid,
343 .apicid_to_cpu_present = NULL, 339 .apicid_to_cpu_present = NULL,
344 .setup_portio_remap = NULL, 340 .setup_portio_remap = NULL,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index e31b9ffe25f5..f1baa2dc087a 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -54,11 +54,6 @@ static u64 noop_apic_icr_read(void)
54 return 0; 54 return 0;
55} 55}
56 56
57static int noop_cpu_to_logical_apicid(int cpu)
58{
59 return 0;
60}
61
62static int noop_phys_pkg_id(int cpuid_apic, int index_msb) 57static int noop_phys_pkg_id(int cpuid_apic, int index_msb)
63{ 58{
64 return 0; 59 return 0;
@@ -113,12 +108,6 @@ static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask)
113 cpumask_set_cpu(cpu, retmask); 108 cpumask_set_cpu(cpu, retmask);
114} 109}
115 110
116int noop_apicid_to_node(int logical_apicid)
117{
118 /* we're always on node 0 */
119 return 0;
120}
121
122static u32 noop_apic_read(u32 reg) 111static u32 noop_apic_read(u32 reg)
123{ 112{
124 WARN_ON_ONCE((cpu_has_apic && !disable_apic)); 113 WARN_ON_ONCE((cpu_has_apic && !disable_apic));
@@ -130,6 +119,14 @@ static void noop_apic_write(u32 reg, u32 v)
130 WARN_ON_ONCE(cpu_has_apic && !disable_apic); 119 WARN_ON_ONCE(cpu_has_apic && !disable_apic);
131} 120}
132 121
122#ifdef CONFIG_X86_32
123static int noop_x86_32_numa_cpu_node(int cpu)
124{
125 /* we're always on node 0 */
126 return 0;
127}
128#endif
129
133struct apic apic_noop = { 130struct apic apic_noop = {
134 .name = "noop", 131 .name = "noop",
135 .probe = noop_probe, 132 .probe = noop_probe,
@@ -153,9 +150,7 @@ struct apic apic_noop = {
153 .ioapic_phys_id_map = default_ioapic_phys_id_map, 150 .ioapic_phys_id_map = default_ioapic_phys_id_map,
154 .setup_apic_routing = NULL, 151 .setup_apic_routing = NULL,
155 .multi_timer_check = NULL, 152 .multi_timer_check = NULL,
156 .apicid_to_node = noop_apicid_to_node,
157 153
158 .cpu_to_logical_apicid = noop_cpu_to_logical_apicid,
159 .cpu_present_to_apicid = default_cpu_present_to_apicid, 154 .cpu_present_to_apicid = default_cpu_present_to_apicid,
160 .apicid_to_cpu_present = physid_set_mask_of_physid, 155 .apicid_to_cpu_present = physid_set_mask_of_physid,
161 156
@@ -197,4 +192,9 @@ struct apic apic_noop = {
197 .icr_write = noop_apic_icr_write, 192 .icr_write = noop_apic_icr_write,
198 .wait_icr_idle = noop_apic_wait_icr_idle, 193 .wait_icr_idle = noop_apic_wait_icr_idle,
199 .safe_wait_icr_idle = noop_safe_apic_wait_icr_idle, 194 .safe_wait_icr_idle = noop_safe_apic_wait_icr_idle,
195
196#ifdef CONFIG_X86_32
197 .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid,
198 .x86_32_numa_cpu_node = noop_x86_32_numa_cpu_node,
199#endif
200}; 200};
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index cb804c5091b9..541a2e431659 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -45,6 +45,12 @@ static unsigned long bigsmp_check_apicid_present(int bit)
45 return 1; 45 return 1;
46} 46}
47 47
48static int bigsmp_early_logical_apicid(int cpu)
49{
50 /* on bigsmp, logical apicid is the same as physical */
51 return early_per_cpu(x86_cpu_to_apicid, cpu);
52}
53
48static inline unsigned long calculate_ldr(int cpu) 54static inline unsigned long calculate_ldr(int cpu)
49{ 55{
50 unsigned long val, id; 56 unsigned long val, id;
@@ -80,11 +86,6 @@ static void bigsmp_setup_apic_routing(void)
80 nr_ioapics); 86 nr_ioapics);
81} 87}
82 88
83static int bigsmp_apicid_to_node(int logical_apicid)
84{
85 return apicid_2_node[hard_smp_processor_id()];
86}
87
88static int bigsmp_cpu_present_to_apicid(int mps_cpu) 89static int bigsmp_cpu_present_to_apicid(int mps_cpu)
89{ 90{
90 if (mps_cpu < nr_cpu_ids) 91 if (mps_cpu < nr_cpu_ids)
@@ -93,14 +94,6 @@ static int bigsmp_cpu_present_to_apicid(int mps_cpu)
93 return BAD_APICID; 94 return BAD_APICID;
94} 95}
95 96
96/* Mapping from cpu number to logical apicid */
97static inline int bigsmp_cpu_to_logical_apicid(int cpu)
98{
99 if (cpu >= nr_cpu_ids)
100 return BAD_APICID;
101 return cpu_physical_id(cpu);
102}
103
104static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) 97static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
105{ 98{
106 /* For clustered we don't have a good way to do this yet - hack */ 99 /* For clustered we don't have a good way to do this yet - hack */
@@ -115,7 +108,11 @@ static int bigsmp_check_phys_apicid_present(int phys_apicid)
115/* As we are using single CPU as destination, pick only one CPU here */ 108/* As we are using single CPU as destination, pick only one CPU here */
116static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask) 109static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask)
117{ 110{
118 return bigsmp_cpu_to_logical_apicid(cpumask_first(cpumask)); 111 int cpu = cpumask_first(cpumask);
112
113 if (cpu < nr_cpu_ids)
114 return cpu_physical_id(cpu);
115 return BAD_APICID;
119} 116}
120 117
121static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, 118static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
@@ -129,9 +126,9 @@ static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
129 */ 126 */
130 for_each_cpu_and(cpu, cpumask, andmask) { 127 for_each_cpu_and(cpu, cpumask, andmask) {
131 if (cpumask_test_cpu(cpu, cpu_online_mask)) 128 if (cpumask_test_cpu(cpu, cpu_online_mask))
132 break; 129 return cpu_physical_id(cpu);
133 } 130 }
134 return bigsmp_cpu_to_logical_apicid(cpu); 131 return BAD_APICID;
135} 132}
136 133
137static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) 134static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
@@ -219,8 +216,6 @@ struct apic apic_bigsmp = {
219 .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, 216 .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map,
220 .setup_apic_routing = bigsmp_setup_apic_routing, 217 .setup_apic_routing = bigsmp_setup_apic_routing,
221 .multi_timer_check = NULL, 218 .multi_timer_check = NULL,
222 .apicid_to_node = bigsmp_apicid_to_node,
223 .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid,
224 .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, 219 .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid,
225 .apicid_to_cpu_present = physid_set_mask_of_physid, 220 .apicid_to_cpu_present = physid_set_mask_of_physid,
226 .setup_portio_remap = NULL, 221 .setup_portio_remap = NULL,
@@ -256,4 +251,7 @@ struct apic apic_bigsmp = {
256 .icr_write = native_apic_icr_write, 251 .icr_write = native_apic_icr_write,
257 .wait_icr_idle = native_apic_wait_icr_idle, 252 .wait_icr_idle = native_apic_wait_icr_idle,
258 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, 253 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
254
255 .x86_32_early_logical_apicid = bigsmp_early_logical_apicid,
256 .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node,
259}; 257};
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 8593582d8022..3e9de4854c5b 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -460,6 +460,12 @@ static unsigned long es7000_check_apicid_present(int bit)
460 return physid_isset(bit, phys_cpu_present_map); 460 return physid_isset(bit, phys_cpu_present_map);
461} 461}
462 462
463static int es7000_early_logical_apicid(int cpu)
464{
465 /* on es7000, logical apicid is the same as physical */
466 return early_per_cpu(x86_bios_cpu_apicid, cpu);
467}
468
463static unsigned long calculate_ldr(int cpu) 469static unsigned long calculate_ldr(int cpu)
464{ 470{
465 unsigned long id = per_cpu(x86_bios_cpu_apicid, cpu); 471 unsigned long id = per_cpu(x86_bios_cpu_apicid, cpu);
@@ -504,12 +510,11 @@ static void es7000_setup_apic_routing(void)
504 nr_ioapics, cpumask_bits(es7000_target_cpus())[0]); 510 nr_ioapics, cpumask_bits(es7000_target_cpus())[0]);
505} 511}
506 512
507static int es7000_apicid_to_node(int logical_apicid) 513static int es7000_numa_cpu_node(int cpu)
508{ 514{
509 return 0; 515 return 0;
510} 516}
511 517
512
513static int es7000_cpu_present_to_apicid(int mps_cpu) 518static int es7000_cpu_present_to_apicid(int mps_cpu)
514{ 519{
515 if (!mps_cpu) 520 if (!mps_cpu)
@@ -528,18 +533,6 @@ static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap)
528 ++cpu_id; 533 ++cpu_id;
529} 534}
530 535
531/* Mapping from cpu number to logical apicid */
532static int es7000_cpu_to_logical_apicid(int cpu)
533{
534#ifdef CONFIG_SMP
535 if (cpu >= nr_cpu_ids)
536 return BAD_APICID;
537 return cpu_2_logical_apicid[cpu];
538#else
539 return logical_smp_processor_id();
540#endif
541}
542
543static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) 536static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
544{ 537{
545 /* For clustered we don't have a good way to do this yet - hack */ 538 /* For clustered we don't have a good way to do this yet - hack */
@@ -561,7 +554,7 @@ static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask)
561 * The cpus in the mask must all be on the apic cluster. 554 * The cpus in the mask must all be on the apic cluster.
562 */ 555 */
563 for_each_cpu(cpu, cpumask) { 556 for_each_cpu(cpu, cpumask) {
564 int new_apicid = es7000_cpu_to_logical_apicid(cpu); 557 int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
565 558
566 if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { 559 if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
567 WARN(1, "Not a valid mask!"); 560 WARN(1, "Not a valid mask!");
@@ -578,7 +571,7 @@ static unsigned int
578es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, 571es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask,
579 const struct cpumask *andmask) 572 const struct cpumask *andmask)
580{ 573{
581 int apicid = es7000_cpu_to_logical_apicid(0); 574 int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
582 cpumask_var_t cpumask; 575 cpumask_var_t cpumask;
583 576
584 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) 577 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
@@ -655,8 +648,6 @@ struct apic __refdata apic_es7000_cluster = {
655 .ioapic_phys_id_map = es7000_ioapic_phys_id_map, 648 .ioapic_phys_id_map = es7000_ioapic_phys_id_map,
656 .setup_apic_routing = es7000_setup_apic_routing, 649 .setup_apic_routing = es7000_setup_apic_routing,
657 .multi_timer_check = NULL, 650 .multi_timer_check = NULL,
658 .apicid_to_node = es7000_apicid_to_node,
659 .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid,
660 .cpu_present_to_apicid = es7000_cpu_present_to_apicid, 651 .cpu_present_to_apicid = es7000_cpu_present_to_apicid,
661 .apicid_to_cpu_present = es7000_apicid_to_cpu_present, 652 .apicid_to_cpu_present = es7000_apicid_to_cpu_present,
662 .setup_portio_remap = NULL, 653 .setup_portio_remap = NULL,
@@ -695,6 +686,9 @@ struct apic __refdata apic_es7000_cluster = {
695 .icr_write = native_apic_icr_write, 686 .icr_write = native_apic_icr_write,
696 .wait_icr_idle = native_apic_wait_icr_idle, 687 .wait_icr_idle = native_apic_wait_icr_idle,
697 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, 688 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
689
690 .x86_32_early_logical_apicid = es7000_early_logical_apicid,
691 .x86_32_numa_cpu_node = es7000_numa_cpu_node,
698}; 692};
699 693
700struct apic __refdata apic_es7000 = { 694struct apic __refdata apic_es7000 = {
@@ -720,8 +714,6 @@ struct apic __refdata apic_es7000 = {
720 .ioapic_phys_id_map = es7000_ioapic_phys_id_map, 714 .ioapic_phys_id_map = es7000_ioapic_phys_id_map,
721 .setup_apic_routing = es7000_setup_apic_routing, 715 .setup_apic_routing = es7000_setup_apic_routing,
722 .multi_timer_check = NULL, 716 .multi_timer_check = NULL,
723 .apicid_to_node = es7000_apicid_to_node,
724 .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid,
725 .cpu_present_to_apicid = es7000_cpu_present_to_apicid, 717 .cpu_present_to_apicid = es7000_cpu_present_to_apicid,
726 .apicid_to_cpu_present = es7000_apicid_to_cpu_present, 718 .apicid_to_cpu_present = es7000_apicid_to_cpu_present,
727 .setup_portio_remap = NULL, 719 .setup_portio_remap = NULL,
@@ -758,4 +750,7 @@ struct apic __refdata apic_es7000 = {
758 .icr_write = native_apic_icr_write, 750 .icr_write = native_apic_icr_write,
759 .wait_icr_idle = native_apic_wait_icr_idle, 751 .wait_icr_idle = native_apic_wait_icr_idle,
760 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, 752 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
753
754 .x86_32_early_logical_apicid = es7000_early_logical_apicid,
755 .x86_32_numa_cpu_node = es7000_numa_cpu_node,
761}; 756};
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 79fd43ca6f96..c4e557a1ebb6 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -83,7 +83,6 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
83 arch_spin_lock(&lock); 83 arch_spin_lock(&lock);
84 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); 84 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
85 show_regs(regs); 85 show_regs(regs);
86 dump_stack();
87 arch_spin_unlock(&lock); 86 arch_spin_unlock(&lock);
88 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); 87 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
89 return NOTIFY_STOP; 88 return NOTIFY_STOP;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 8d23e831a45e..4b5ebd26f565 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -187,7 +187,7 @@ int __init arch_early_irq_init(void)
187 irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs); 187 irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs);
188 188
189 for (i = 0; i < count; i++) { 189 for (i = 0; i < count; i++) {
190 set_irq_chip_data(i, &cfg[i]); 190 irq_set_chip_data(i, &cfg[i]);
191 zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node); 191 zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node);
192 zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node); 192 zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node);
193 /* 193 /*
@@ -206,7 +206,7 @@ int __init arch_early_irq_init(void)
206#ifdef CONFIG_SPARSE_IRQ 206#ifdef CONFIG_SPARSE_IRQ
207static struct irq_cfg *irq_cfg(unsigned int irq) 207static struct irq_cfg *irq_cfg(unsigned int irq)
208{ 208{
209 return get_irq_chip_data(irq); 209 return irq_get_chip_data(irq);
210} 210}
211 211
212static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node) 212static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
@@ -232,7 +232,7 @@ static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
232{ 232{
233 if (!cfg) 233 if (!cfg)
234 return; 234 return;
235 set_irq_chip_data(at, NULL); 235 irq_set_chip_data(at, NULL);
236 free_cpumask_var(cfg->domain); 236 free_cpumask_var(cfg->domain);
237 free_cpumask_var(cfg->old_domain); 237 free_cpumask_var(cfg->old_domain);
238 kfree(cfg); 238 kfree(cfg);
@@ -262,14 +262,14 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
262 if (res < 0) { 262 if (res < 0) {
263 if (res != -EEXIST) 263 if (res != -EEXIST)
264 return NULL; 264 return NULL;
265 cfg = get_irq_chip_data(at); 265 cfg = irq_get_chip_data(at);
266 if (cfg) 266 if (cfg)
267 return cfg; 267 return cfg;
268 } 268 }
269 269
270 cfg = alloc_irq_cfg(at, node); 270 cfg = alloc_irq_cfg(at, node);
271 if (cfg) 271 if (cfg)
272 set_irq_chip_data(at, cfg); 272 irq_set_chip_data(at, cfg);
273 else 273 else
274 irq_free_desc(at); 274 irq_free_desc(at);
275 return cfg; 275 return cfg;
@@ -1185,7 +1185,7 @@ void __setup_vector_irq(int cpu)
1185 raw_spin_lock(&vector_lock); 1185 raw_spin_lock(&vector_lock);
1186 /* Mark the inuse vectors */ 1186 /* Mark the inuse vectors */
1187 for_each_active_irq(irq) { 1187 for_each_active_irq(irq) {
1188 cfg = get_irq_chip_data(irq); 1188 cfg = irq_get_chip_data(irq);
1189 if (!cfg) 1189 if (!cfg)
1190 continue; 1190 continue;
1191 /* 1191 /*
@@ -1240,35 +1240,31 @@ static inline int IO_APIC_irq_trigger(int irq)
1240} 1240}
1241#endif 1241#endif
1242 1242
1243static void ioapic_register_intr(unsigned int irq, unsigned long trigger) 1243static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
1244 unsigned long trigger)
1244{ 1245{
1246 struct irq_chip *chip = &ioapic_chip;
1247 irq_flow_handler_t hdl;
1248 bool fasteoi;
1245 1249
1246 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 1250 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1247 trigger == IOAPIC_LEVEL) 1251 trigger == IOAPIC_LEVEL) {
1248 irq_set_status_flags(irq, IRQ_LEVEL); 1252 irq_set_status_flags(irq, IRQ_LEVEL);
1249 else 1253 fasteoi = true;
1254 } else {
1250 irq_clear_status_flags(irq, IRQ_LEVEL); 1255 irq_clear_status_flags(irq, IRQ_LEVEL);
1256 fasteoi = false;
1257 }
1251 1258
1252 if (irq_remapped(get_irq_chip_data(irq))) { 1259 if (irq_remapped(cfg)) {
1253 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); 1260 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
1254 if (trigger) 1261 chip = &ir_ioapic_chip;
1255 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, 1262 fasteoi = trigger != 0;
1256 handle_fasteoi_irq,
1257 "fasteoi");
1258 else
1259 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
1260 handle_edge_irq, "edge");
1261 return;
1262 } 1263 }
1263 1264
1264 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 1265 hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq;
1265 trigger == IOAPIC_LEVEL) 1266 irq_set_chip_and_handler_name(irq, chip, hdl,
1266 set_irq_chip_and_handler_name(irq, &ioapic_chip, 1267 fasteoi ? "fasteoi" : "edge");
1267 handle_fasteoi_irq,
1268 "fasteoi");
1269 else
1270 set_irq_chip_and_handler_name(irq, &ioapic_chip,
1271 handle_edge_irq, "edge");
1272} 1268}
1273 1269
1274static int setup_ioapic_entry(int apic_id, int irq, 1270static int setup_ioapic_entry(int apic_id, int irq,
@@ -1366,7 +1362,7 @@ static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq,
1366 return; 1362 return;
1367 } 1363 }
1368 1364
1369 ioapic_register_intr(irq, trigger); 1365 ioapic_register_intr(irq, cfg, trigger);
1370 if (irq < legacy_pic->nr_legacy_irqs) 1366 if (irq < legacy_pic->nr_legacy_irqs)
1371 legacy_pic->mask(irq); 1367 legacy_pic->mask(irq);
1372 1368
@@ -1491,7 +1487,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
1491 * The timer IRQ doesn't have to know that behind the 1487 * The timer IRQ doesn't have to know that behind the
1492 * scene we may have a 8259A-master in AEOI mode ... 1488 * scene we may have a 8259A-master in AEOI mode ...
1493 */ 1489 */
1494 set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); 1490 irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq,
1491 "edge");
1495 1492
1496 /* 1493 /*
1497 * Add it to the IO-APIC irq-routing table: 1494 * Add it to the IO-APIC irq-routing table:
@@ -1598,7 +1595,7 @@ __apicdebuginit(void) print_IO_APIC(void)
1598 for_each_active_irq(irq) { 1595 for_each_active_irq(irq) {
1599 struct irq_pin_list *entry; 1596 struct irq_pin_list *entry;
1600 1597
1601 cfg = get_irq_chip_data(irq); 1598 cfg = irq_get_chip_data(irq);
1602 if (!cfg) 1599 if (!cfg)
1603 continue; 1600 continue;
1604 entry = cfg->irq_2_pin; 1601 entry = cfg->irq_2_pin;
@@ -2364,7 +2361,7 @@ static void irq_complete_move(struct irq_cfg *cfg)
2364 2361
2365void irq_force_complete_move(int irq) 2362void irq_force_complete_move(int irq)
2366{ 2363{
2367 struct irq_cfg *cfg = get_irq_chip_data(irq); 2364 struct irq_cfg *cfg = irq_get_chip_data(irq);
2368 2365
2369 if (!cfg) 2366 if (!cfg)
2370 return; 2367 return;
@@ -2378,7 +2375,7 @@ static inline void irq_complete_move(struct irq_cfg *cfg) { }
2378static void ack_apic_edge(struct irq_data *data) 2375static void ack_apic_edge(struct irq_data *data)
2379{ 2376{
2380 irq_complete_move(data->chip_data); 2377 irq_complete_move(data->chip_data);
2381 move_native_irq(data->irq); 2378 irq_move_irq(data);
2382 ack_APIC_irq(); 2379 ack_APIC_irq();
2383} 2380}
2384 2381
@@ -2435,7 +2432,7 @@ static void ack_apic_level(struct irq_data *data)
2435 irq_complete_move(cfg); 2432 irq_complete_move(cfg);
2436#ifdef CONFIG_GENERIC_PENDING_IRQ 2433#ifdef CONFIG_GENERIC_PENDING_IRQ
2437 /* If we are moving the irq we need to mask it */ 2434 /* If we are moving the irq we need to mask it */
2438 if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { 2435 if (unlikely(irqd_is_setaffinity_pending(data))) {
2439 do_unmask_irq = 1; 2436 do_unmask_irq = 1;
2440 mask_ioapic(cfg); 2437 mask_ioapic(cfg);
2441 } 2438 }
@@ -2524,7 +2521,7 @@ static void ack_apic_level(struct irq_data *data)
2524 * and you can go talk to the chipset vendor about it. 2521 * and you can go talk to the chipset vendor about it.
2525 */ 2522 */
2526 if (!io_apic_level_ack_pending(cfg)) 2523 if (!io_apic_level_ack_pending(cfg))
2527 move_masked_irq(irq); 2524 irq_move_masked_irq(data);
2528 unmask_ioapic(cfg); 2525 unmask_ioapic(cfg);
2529 } 2526 }
2530} 2527}
@@ -2587,7 +2584,7 @@ static inline void init_IO_APIC_traps(void)
2587 * 0x80, because int 0x80 is hm, kind of importantish. ;) 2584 * 0x80, because int 0x80 is hm, kind of importantish. ;)
2588 */ 2585 */
2589 for_each_active_irq(irq) { 2586 for_each_active_irq(irq) {
2590 cfg = get_irq_chip_data(irq); 2587 cfg = irq_get_chip_data(irq);
2591 if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { 2588 if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
2592 /* 2589 /*
2593 * Hmm.. We don't have an entry for this, 2590 * Hmm.. We don't have an entry for this,
@@ -2598,7 +2595,7 @@ static inline void init_IO_APIC_traps(void)
2598 legacy_pic->make_irq(irq); 2595 legacy_pic->make_irq(irq);
2599 else 2596 else
2600 /* Strange. Oh, well.. */ 2597 /* Strange. Oh, well.. */
2601 set_irq_chip(irq, &no_irq_chip); 2598 irq_set_chip(irq, &no_irq_chip);
2602 } 2599 }
2603 } 2600 }
2604} 2601}
@@ -2638,7 +2635,7 @@ static struct irq_chip lapic_chip __read_mostly = {
2638static void lapic_register_intr(int irq) 2635static void lapic_register_intr(int irq)
2639{ 2636{
2640 irq_clear_status_flags(irq, IRQ_LEVEL); 2637 irq_clear_status_flags(irq, IRQ_LEVEL);
2641 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, 2638 irq_set_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
2642 "edge"); 2639 "edge");
2643} 2640}
2644 2641
@@ -2722,7 +2719,7 @@ int timer_through_8259 __initdata;
2722 */ 2719 */
2723static inline void __init check_timer(void) 2720static inline void __init check_timer(void)
2724{ 2721{
2725 struct irq_cfg *cfg = get_irq_chip_data(0); 2722 struct irq_cfg *cfg = irq_get_chip_data(0);
2726 int node = cpu_to_node(0); 2723 int node = cpu_to_node(0);
2727 int apic1, pin1, apic2, pin2; 2724 int apic1, pin1, apic2, pin2;
2728 unsigned long flags; 2725 unsigned long flags;
@@ -3033,7 +3030,7 @@ unsigned int create_irq_nr(unsigned int from, int node)
3033 raw_spin_unlock_irqrestore(&vector_lock, flags); 3030 raw_spin_unlock_irqrestore(&vector_lock, flags);
3034 3031
3035 if (ret) { 3032 if (ret) {
3036 set_irq_chip_data(irq, cfg); 3033 irq_set_chip_data(irq, cfg);
3037 irq_clear_status_flags(irq, IRQ_NOREQUEST); 3034 irq_clear_status_flags(irq, IRQ_NOREQUEST);
3038 } else { 3035 } else {
3039 free_irq_at(irq, cfg); 3036 free_irq_at(irq, cfg);
@@ -3058,7 +3055,7 @@ int create_irq(void)
3058 3055
3059void destroy_irq(unsigned int irq) 3056void destroy_irq(unsigned int irq)
3060{ 3057{
3061 struct irq_cfg *cfg = get_irq_chip_data(irq); 3058 struct irq_cfg *cfg = irq_get_chip_data(irq);
3062 unsigned long flags; 3059 unsigned long flags;
3063 3060
3064 irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); 3061 irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
@@ -3092,7 +3089,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
3092 3089
3093 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); 3090 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
3094 3091
3095 if (irq_remapped(get_irq_chip_data(irq))) { 3092 if (irq_remapped(cfg)) {
3096 struct irte irte; 3093 struct irte irte;
3097 int ir_index; 3094 int ir_index;
3098 u16 sub_handle; 3095 u16 sub_handle;
@@ -3264,6 +3261,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
3264 3261
3265static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) 3262static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3266{ 3263{
3264 struct irq_chip *chip = &msi_chip;
3267 struct msi_msg msg; 3265 struct msi_msg msg;
3268 int ret; 3266 int ret;
3269 3267
@@ -3271,14 +3269,15 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3271 if (ret < 0) 3269 if (ret < 0)
3272 return ret; 3270 return ret;
3273 3271
3274 set_irq_msi(irq, msidesc); 3272 irq_set_msi_desc(irq, msidesc);
3275 write_msi_msg(irq, &msg); 3273 write_msi_msg(irq, &msg);
3276 3274
3277 if (irq_remapped(get_irq_chip_data(irq))) { 3275 if (irq_remapped(irq_get_chip_data(irq))) {
3278 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); 3276 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
3279 set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); 3277 chip = &msi_ir_chip;
3280 } else 3278 }
3281 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); 3279
3280 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
3282 3281
3283 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq); 3282 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
3284 3283
@@ -3396,8 +3395,8 @@ int arch_setup_dmar_msi(unsigned int irq)
3396 if (ret < 0) 3395 if (ret < 0)
3397 return ret; 3396 return ret;
3398 dmar_msi_write(irq, &msg); 3397 dmar_msi_write(irq, &msg);
3399 set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, 3398 irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
3400 "edge"); 3399 "edge");
3401 return 0; 3400 return 0;
3402} 3401}
3403#endif 3402#endif
@@ -3455,6 +3454,7 @@ static struct irq_chip hpet_msi_type = {
3455 3454
3456int arch_setup_hpet_msi(unsigned int irq, unsigned int id) 3455int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
3457{ 3456{
3457 struct irq_chip *chip = &hpet_msi_type;
3458 struct msi_msg msg; 3458 struct msi_msg msg;
3459 int ret; 3459 int ret;
3460 3460
@@ -3474,15 +3474,12 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
3474 if (ret < 0) 3474 if (ret < 0)
3475 return ret; 3475 return ret;
3476 3476
3477 hpet_msi_write(get_irq_data(irq), &msg); 3477 hpet_msi_write(irq_get_handler_data(irq), &msg);
3478 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); 3478 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
3479 if (irq_remapped(get_irq_chip_data(irq))) 3479 if (irq_remapped(irq_get_chip_data(irq)))
3480 set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type, 3480 chip = &ir_hpet_msi_type;
3481 handle_edge_irq, "edge");
3482 else
3483 set_irq_chip_and_handler_name(irq, &hpet_msi_type,
3484 handle_edge_irq, "edge");
3485 3481
3482 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
3486 return 0; 3483 return 0;
3487} 3484}
3488#endif 3485#endif
@@ -3569,7 +3566,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3569 3566
3570 write_ht_irq_msg(irq, &msg); 3567 write_ht_irq_msg(irq, &msg);
3571 3568
3572 set_irq_chip_and_handler_name(irq, &ht_irq_chip, 3569 irq_set_chip_and_handler_name(irq, &ht_irq_chip,
3573 handle_edge_irq, "edge"); 3570 handle_edge_irq, "edge");
3574 3571
3575 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); 3572 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
@@ -3826,8 +3823,8 @@ int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity)
3826void __init setup_ioapic_dest(void) 3823void __init setup_ioapic_dest(void)
3827{ 3824{
3828 int pin, ioapic, irq, irq_entry; 3825 int pin, ioapic, irq, irq_entry;
3829 struct irq_desc *desc;
3830 const struct cpumask *mask; 3826 const struct cpumask *mask;
3827 struct irq_data *idata;
3831 3828
3832 if (skip_ioapic_setup == 1) 3829 if (skip_ioapic_setup == 1)
3833 return; 3830 return;
@@ -3842,21 +3839,20 @@ void __init setup_ioapic_dest(void)
3842 if ((ioapic > 0) && (irq > 16)) 3839 if ((ioapic > 0) && (irq > 16))
3843 continue; 3840 continue;
3844 3841
3845 desc = irq_to_desc(irq); 3842 idata = irq_get_irq_data(irq);
3846 3843
3847 /* 3844 /*
3848 * Honour affinities which have been set in early boot 3845 * Honour affinities which have been set in early boot
3849 */ 3846 */
3850 if (desc->status & 3847 if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata))
3851 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) 3848 mask = idata->affinity;
3852 mask = desc->irq_data.affinity;
3853 else 3849 else
3854 mask = apic->target_cpus(); 3850 mask = apic->target_cpus();
3855 3851
3856 if (intr_remapping_enabled) 3852 if (intr_remapping_enabled)
3857 ir_ioapic_set_affinity(&desc->irq_data, mask, false); 3853 ir_ioapic_set_affinity(idata, mask, false);
3858 else 3854 else
3859 ioapic_set_affinity(&desc->irq_data, mask, false); 3855 ioapic_set_affinity(idata, mask, false);
3860 } 3856 }
3861 3857
3862} 3858}
@@ -4054,5 +4050,6 @@ void __init pre_init_apic_IRQ0(void)
4054 setup_local_APIC(); 4050 setup_local_APIC();
4055 4051
4056 io_apic_setup_irq_pin(0, 0, &attr); 4052 io_apic_setup_irq_pin(0, 0, &attr);
4057 set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); 4053 irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq,
4054 "edge");
4058} 4055}
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 08385e090a6f..cce91bf26676 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -56,6 +56,8 @@ void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
56 local_irq_restore(flags); 56 local_irq_restore(flags);
57} 57}
58 58
59#ifdef CONFIG_X86_32
60
59void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, 61void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
60 int vector) 62 int vector)
61{ 63{
@@ -71,8 +73,8 @@ void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
71 local_irq_save(flags); 73 local_irq_save(flags);
72 for_each_cpu(query_cpu, mask) 74 for_each_cpu(query_cpu, mask)
73 __default_send_IPI_dest_field( 75 __default_send_IPI_dest_field(
74 apic->cpu_to_logical_apicid(query_cpu), vector, 76 early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
75 apic->dest_logical); 77 vector, apic->dest_logical);
76 local_irq_restore(flags); 78 local_irq_restore(flags);
77} 79}
78 80
@@ -90,14 +92,12 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
90 if (query_cpu == this_cpu) 92 if (query_cpu == this_cpu)
91 continue; 93 continue;
92 __default_send_IPI_dest_field( 94 __default_send_IPI_dest_field(
93 apic->cpu_to_logical_apicid(query_cpu), vector, 95 early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
94 apic->dest_logical); 96 vector, apic->dest_logical);
95 } 97 }
96 local_irq_restore(flags); 98 local_irq_restore(flags);
97} 99}
98 100
99#ifdef CONFIG_X86_32
100
101/* 101/*
102 * This is only used on smaller machines. 102 * This is only used on smaller machines.
103 */ 103 */
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 960f26ab5c9f..6273eee5134b 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -373,13 +373,6 @@ static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask
373 return physids_promote(0xFUL, retmap); 373 return physids_promote(0xFUL, retmap);
374} 374}
375 375
376static inline int numaq_cpu_to_logical_apicid(int cpu)
377{
378 if (cpu >= nr_cpu_ids)
379 return BAD_APICID;
380 return cpu_2_logical_apicid[cpu];
381}
382
383/* 376/*
384 * Supporting over 60 cpus on NUMA-Q requires a locality-dependent 377 * Supporting over 60 cpus on NUMA-Q requires a locality-dependent
385 * cpu to APIC ID relation to properly interact with the intelligent 378 * cpu to APIC ID relation to properly interact with the intelligent
@@ -398,6 +391,15 @@ static inline int numaq_apicid_to_node(int logical_apicid)
398 return logical_apicid >> 4; 391 return logical_apicid >> 4;
399} 392}
400 393
394static int numaq_numa_cpu_node(int cpu)
395{
396 int logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
397
398 if (logical_apicid != BAD_APICID)
399 return numaq_apicid_to_node(logical_apicid);
400 return NUMA_NO_NODE;
401}
402
401static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap) 403static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap)
402{ 404{
403 int node = numaq_apicid_to_node(logical_apicid); 405 int node = numaq_apicid_to_node(logical_apicid);
@@ -508,8 +510,6 @@ struct apic __refdata apic_numaq = {
508 .ioapic_phys_id_map = numaq_ioapic_phys_id_map, 510 .ioapic_phys_id_map = numaq_ioapic_phys_id_map,
509 .setup_apic_routing = numaq_setup_apic_routing, 511 .setup_apic_routing = numaq_setup_apic_routing,
510 .multi_timer_check = numaq_multi_timer_check, 512 .multi_timer_check = numaq_multi_timer_check,
511 .apicid_to_node = numaq_apicid_to_node,
512 .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid,
513 .cpu_present_to_apicid = numaq_cpu_present_to_apicid, 513 .cpu_present_to_apicid = numaq_cpu_present_to_apicid,
514 .apicid_to_cpu_present = numaq_apicid_to_cpu_present, 514 .apicid_to_cpu_present = numaq_apicid_to_cpu_present,
515 .setup_portio_remap = numaq_setup_portio_remap, 515 .setup_portio_remap = numaq_setup_portio_remap,
@@ -547,4 +547,7 @@ struct apic __refdata apic_numaq = {
547 .icr_write = native_apic_icr_write, 547 .icr_write = native_apic_icr_write,
548 .wait_icr_idle = native_apic_wait_icr_idle, 548 .wait_icr_idle = native_apic_wait_icr_idle,
549 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, 549 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
550
551 .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid,
552 .x86_32_numa_cpu_node = numaq_numa_cpu_node,
550}; 553};
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 99d2fe016084..fc84c7b61108 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -77,6 +77,11 @@ void __init default_setup_apic_routing(void)
77 apic->setup_apic_routing(); 77 apic->setup_apic_routing();
78} 78}
79 79
80static int default_x86_32_early_logical_apicid(int cpu)
81{
82 return 1 << cpu;
83}
84
80static void setup_apic_flat_routing(void) 85static void setup_apic_flat_routing(void)
81{ 86{
82#ifdef CONFIG_X86_IO_APIC 87#ifdef CONFIG_X86_IO_APIC
@@ -130,8 +135,6 @@ struct apic apic_default = {
130 .ioapic_phys_id_map = default_ioapic_phys_id_map, 135 .ioapic_phys_id_map = default_ioapic_phys_id_map,
131 .setup_apic_routing = setup_apic_flat_routing, 136 .setup_apic_routing = setup_apic_flat_routing,
132 .multi_timer_check = NULL, 137 .multi_timer_check = NULL,
133 .apicid_to_node = default_apicid_to_node,
134 .cpu_to_logical_apicid = default_cpu_to_logical_apicid,
135 .cpu_present_to_apicid = default_cpu_present_to_apicid, 138 .cpu_present_to_apicid = default_cpu_present_to_apicid,
136 .apicid_to_cpu_present = physid_set_mask_of_physid, 139 .apicid_to_cpu_present = physid_set_mask_of_physid,
137 .setup_portio_remap = NULL, 140 .setup_portio_remap = NULL,
@@ -167,6 +170,9 @@ struct apic apic_default = {
167 .icr_write = native_apic_icr_write, 170 .icr_write = native_apic_icr_write,
168 .wait_icr_idle = native_apic_wait_icr_idle, 171 .wait_icr_idle = native_apic_wait_icr_idle,
169 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, 172 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
173
174 .x86_32_early_logical_apicid = default_x86_32_early_logical_apicid,
175 .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node,
170}; 176};
171 177
172extern struct apic apic_numaq; 178extern struct apic apic_numaq;
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 9b419263d90d..e4b8059b414a 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -194,11 +194,10 @@ static unsigned long summit_check_apicid_present(int bit)
194 return 1; 194 return 1;
195} 195}
196 196
197static void summit_init_apic_ldr(void) 197static int summit_early_logical_apicid(int cpu)
198{ 198{
199 unsigned long val, id;
200 int count = 0; 199 int count = 0;
201 u8 my_id = (u8)hard_smp_processor_id(); 200 u8 my_id = early_per_cpu(x86_cpu_to_apicid, cpu);
202 u8 my_cluster = APIC_CLUSTER(my_id); 201 u8 my_cluster = APIC_CLUSTER(my_id);
203#ifdef CONFIG_SMP 202#ifdef CONFIG_SMP
204 u8 lid; 203 u8 lid;
@@ -206,7 +205,7 @@ static void summit_init_apic_ldr(void)
206 205
207 /* Create logical APIC IDs by counting CPUs already in cluster. */ 206 /* Create logical APIC IDs by counting CPUs already in cluster. */
208 for (count = 0, i = nr_cpu_ids; --i >= 0; ) { 207 for (count = 0, i = nr_cpu_ids; --i >= 0; ) {
209 lid = cpu_2_logical_apicid[i]; 208 lid = early_per_cpu(x86_cpu_to_logical_apicid, i);
210 if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster) 209 if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster)
211 ++count; 210 ++count;
212 } 211 }
@@ -214,7 +213,15 @@ static void summit_init_apic_ldr(void)
214 /* We only have a 4 wide bitmap in cluster mode. If a deranged 213 /* We only have a 4 wide bitmap in cluster mode. If a deranged
215 * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */ 214 * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */
216 BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT); 215 BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT);
217 id = my_cluster | (1UL << count); 216 return my_cluster | (1UL << count);
217}
218
219static void summit_init_apic_ldr(void)
220{
221 int cpu = smp_processor_id();
222 unsigned long id = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
223 unsigned long val;
224
218 apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE); 225 apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE);
219 val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; 226 val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
220 val |= SET_APIC_LOGICAL_ID(id); 227 val |= SET_APIC_LOGICAL_ID(id);
@@ -232,27 +239,6 @@ static void summit_setup_apic_routing(void)
232 nr_ioapics); 239 nr_ioapics);
233} 240}
234 241
235static int summit_apicid_to_node(int logical_apicid)
236{
237#ifdef CONFIG_SMP
238 return apicid_2_node[hard_smp_processor_id()];
239#else
240 return 0;
241#endif
242}
243
244/* Mapping from cpu number to logical apicid */
245static inline int summit_cpu_to_logical_apicid(int cpu)
246{
247#ifdef CONFIG_SMP
248 if (cpu >= nr_cpu_ids)
249 return BAD_APICID;
250 return cpu_2_logical_apicid[cpu];
251#else
252 return logical_smp_processor_id();
253#endif
254}
255
256static int summit_cpu_present_to_apicid(int mps_cpu) 242static int summit_cpu_present_to_apicid(int mps_cpu)
257{ 243{
258 if (mps_cpu < nr_cpu_ids) 244 if (mps_cpu < nr_cpu_ids)
@@ -286,7 +272,7 @@ static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
286 * The cpus in the mask must all be on the apic cluster. 272 * The cpus in the mask must all be on the apic cluster.
287 */ 273 */
288 for_each_cpu(cpu, cpumask) { 274 for_each_cpu(cpu, cpumask) {
289 int new_apicid = summit_cpu_to_logical_apicid(cpu); 275 int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
290 276
291 if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { 277 if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
292 printk("%s: Not a valid mask!\n", __func__); 278 printk("%s: Not a valid mask!\n", __func__);
@@ -301,7 +287,7 @@ static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
301static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, 287static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
302 const struct cpumask *andmask) 288 const struct cpumask *andmask)
303{ 289{
304 int apicid = summit_cpu_to_logical_apicid(0); 290 int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
305 cpumask_var_t cpumask; 291 cpumask_var_t cpumask;
306 292
307 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) 293 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
@@ -528,8 +514,6 @@ struct apic apic_summit = {
528 .ioapic_phys_id_map = summit_ioapic_phys_id_map, 514 .ioapic_phys_id_map = summit_ioapic_phys_id_map,
529 .setup_apic_routing = summit_setup_apic_routing, 515 .setup_apic_routing = summit_setup_apic_routing,
530 .multi_timer_check = NULL, 516 .multi_timer_check = NULL,
531 .apicid_to_node = summit_apicid_to_node,
532 .cpu_to_logical_apicid = summit_cpu_to_logical_apicid,
533 .cpu_present_to_apicid = summit_cpu_present_to_apicid, 517 .cpu_present_to_apicid = summit_cpu_present_to_apicid,
534 .apicid_to_cpu_present = summit_apicid_to_cpu_present, 518 .apicid_to_cpu_present = summit_apicid_to_cpu_present,
535 .setup_portio_remap = NULL, 519 .setup_portio_remap = NULL,
@@ -565,4 +549,7 @@ struct apic apic_summit = {
565 .icr_write = native_apic_icr_write, 549 .icr_write = native_apic_icr_write,
566 .wait_icr_idle = native_apic_wait_icr_idle, 550 .wait_icr_idle = native_apic_wait_icr_idle,
567 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, 551 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
552
553 .x86_32_early_logical_apicid = summit_early_logical_apicid,
554 .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node,
568}; 555};
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index cf69c59f4910..90949bbd566d 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -206,8 +206,6 @@ struct apic apic_x2apic_cluster = {
206 .ioapic_phys_id_map = NULL, 206 .ioapic_phys_id_map = NULL,
207 .setup_apic_routing = NULL, 207 .setup_apic_routing = NULL,
208 .multi_timer_check = NULL, 208 .multi_timer_check = NULL,
209 .apicid_to_node = NULL,
210 .cpu_to_logical_apicid = NULL,
211 .cpu_present_to_apicid = default_cpu_present_to_apicid, 209 .cpu_present_to_apicid = default_cpu_present_to_apicid,
212 .apicid_to_cpu_present = NULL, 210 .apicid_to_cpu_present = NULL,
213 .setup_portio_remap = NULL, 211 .setup_portio_remap = NULL,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 8972f38c5ced..c7e6d6645bf4 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -195,8 +195,6 @@ struct apic apic_x2apic_phys = {
195 .ioapic_phys_id_map = NULL, 195 .ioapic_phys_id_map = NULL,
196 .setup_apic_routing = NULL, 196 .setup_apic_routing = NULL,
197 .multi_timer_check = NULL, 197 .multi_timer_check = NULL,
198 .apicid_to_node = NULL,
199 .cpu_to_logical_apicid = NULL,
200 .cpu_present_to_apicid = default_cpu_present_to_apicid, 198 .cpu_present_to_apicid = default_cpu_present_to_apicid,
201 .apicid_to_cpu_present = NULL, 199 .apicid_to_cpu_present = NULL,
202 .setup_portio_remap = NULL, 200 .setup_portio_remap = NULL,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index bd16b58b8850..3c289281394c 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -338,8 +338,6 @@ struct apic __refdata apic_x2apic_uv_x = {
338 .ioapic_phys_id_map = NULL, 338 .ioapic_phys_id_map = NULL,
339 .setup_apic_routing = NULL, 339 .setup_apic_routing = NULL,
340 .multi_timer_check = NULL, 340 .multi_timer_check = NULL,
341 .apicid_to_node = NULL,
342 .cpu_to_logical_apicid = NULL,
343 .cpu_present_to_apicid = default_cpu_present_to_apicid, 341 .cpu_present_to_apicid = default_cpu_present_to_apicid,
344 .apicid_to_cpu_present = NULL, 342 .apicid_to_cpu_present = NULL,
345 .setup_portio_remap = NULL, 343 .setup_portio_remap = NULL,
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index cfa82c899f47..4f13fafc5264 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -1,5 +1,70 @@
1/*
2 * Generate definitions needed by assembly language modules.
3 * This code generates raw asm output which is post-processed to extract
4 * and format the required data.
5 */
6#define COMPILE_OFFSETS
7
8#include <linux/crypto.h>
9#include <linux/sched.h>
10#include <linux/stddef.h>
11#include <linux/hardirq.h>
12#include <linux/suspend.h>
13#include <linux/kbuild.h>
14#include <asm/processor.h>
15#include <asm/thread_info.h>
16#include <asm/sigframe.h>
17#include <asm/bootparam.h>
18#include <asm/suspend.h>
19
20#ifdef CONFIG_XEN
21#include <xen/interface/xen.h>
22#endif
23
1#ifdef CONFIG_X86_32 24#ifdef CONFIG_X86_32
2# include "asm-offsets_32.c" 25# include "asm-offsets_32.c"
3#else 26#else
4# include "asm-offsets_64.c" 27# include "asm-offsets_64.c"
5#endif 28#endif
29
30void common(void) {
31 BLANK();
32 OFFSET(TI_flags, thread_info, flags);
33 OFFSET(TI_status, thread_info, status);
34 OFFSET(TI_addr_limit, thread_info, addr_limit);
35 OFFSET(TI_preempt_count, thread_info, preempt_count);
36
37 BLANK();
38 OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
39
40 BLANK();
41 OFFSET(pbe_address, pbe, address);
42 OFFSET(pbe_orig_address, pbe, orig_address);
43 OFFSET(pbe_next, pbe, next);
44
45#ifdef CONFIG_PARAVIRT
46 BLANK();
47 OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
48 OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
49 OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
50 OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
51 OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
52 OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
53 OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
54 OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
55 OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
56#endif
57
58#ifdef CONFIG_XEN
59 BLANK();
60 OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
61 OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
62#endif
63
64 BLANK();
65 OFFSET(BP_scratch, boot_params, scratch);
66 OFFSET(BP_loadflags, boot_params, hdr.loadflags);
67 OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
68 OFFSET(BP_version, boot_params, hdr.version);
69 OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
70}
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 1a4088dda37a..c29d631af6fc 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -1,26 +1,4 @@
1/*
2 * Generate definitions needed by assembly language modules.
3 * This code generates raw asm output which is post-processed
4 * to extract and format the required data.
5 */
6
7#include <linux/crypto.h>
8#include <linux/sched.h>
9#include <linux/signal.h>
10#include <linux/personality.h>
11#include <linux/suspend.h>
12#include <linux/kbuild.h>
13#include <asm/ucontext.h> 1#include <asm/ucontext.h>
14#include <asm/sigframe.h>
15#include <asm/pgtable.h>
16#include <asm/fixmap.h>
17#include <asm/processor.h>
18#include <asm/thread_info.h>
19#include <asm/bootparam.h>
20#include <asm/elf.h>
21#include <asm/suspend.h>
22
23#include <xen/interface/xen.h>
24 2
25#include <linux/lguest.h> 3#include <linux/lguest.h>
26#include "../../../drivers/lguest/lg.h" 4#include "../../../drivers/lguest/lg.h"
@@ -51,21 +29,10 @@ void foo(void)
51 OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); 29 OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
52 BLANK(); 30 BLANK();
53 31
54 OFFSET(TI_task, thread_info, task);
55 OFFSET(TI_exec_domain, thread_info, exec_domain);
56 OFFSET(TI_flags, thread_info, flags);
57 OFFSET(TI_status, thread_info, status);
58 OFFSET(TI_preempt_count, thread_info, preempt_count);
59 OFFSET(TI_addr_limit, thread_info, addr_limit);
60 OFFSET(TI_restart_block, thread_info, restart_block);
61 OFFSET(TI_sysenter_return, thread_info, sysenter_return); 32 OFFSET(TI_sysenter_return, thread_info, sysenter_return);
62 OFFSET(TI_cpu, thread_info, cpu); 33 OFFSET(TI_cpu, thread_info, cpu);
63 BLANK(); 34 BLANK();
64 35
65 OFFSET(GDS_size, desc_ptr, size);
66 OFFSET(GDS_address, desc_ptr, address);
67 BLANK();
68
69 OFFSET(PT_EBX, pt_regs, bx); 36 OFFSET(PT_EBX, pt_regs, bx);
70 OFFSET(PT_ECX, pt_regs, cx); 37 OFFSET(PT_ECX, pt_regs, cx);
71 OFFSET(PT_EDX, pt_regs, dx); 38 OFFSET(PT_EDX, pt_regs, dx);
@@ -85,42 +52,13 @@ void foo(void)
85 OFFSET(PT_OLDSS, pt_regs, ss); 52 OFFSET(PT_OLDSS, pt_regs, ss);
86 BLANK(); 53 BLANK();
87 54
88 OFFSET(EXEC_DOMAIN_handler, exec_domain, handler);
89 OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext); 55 OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
90 BLANK(); 56 BLANK();
91 57
92 OFFSET(pbe_address, pbe, address);
93 OFFSET(pbe_orig_address, pbe, orig_address);
94 OFFSET(pbe_next, pbe, next);
95
96 /* Offset from the sysenter stack to tss.sp0 */ 58 /* Offset from the sysenter stack to tss.sp0 */
97 DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - 59 DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
98 sizeof(struct tss_struct)); 60 sizeof(struct tss_struct));
99 61
100 DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
101 DEFINE(PAGE_SHIFT_asm, PAGE_SHIFT);
102 DEFINE(THREAD_SIZE_asm, THREAD_SIZE);
103
104 OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
105
106#ifdef CONFIG_PARAVIRT
107 BLANK();
108 OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
109 OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
110 OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
111 OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
112 OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
113 OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
114 OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
115 OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
116#endif
117
118#ifdef CONFIG_XEN
119 BLANK();
120 OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
121 OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
122#endif
123
124#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) 62#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
125 BLANK(); 63 BLANK();
126 OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); 64 OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
@@ -139,11 +77,4 @@ void foo(void)
139 OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode); 77 OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode);
140 OFFSET(LGUEST_PAGES_regs, lguest_pages, regs); 78 OFFSET(LGUEST_PAGES_regs, lguest_pages, regs);
141#endif 79#endif
142
143 BLANK();
144 OFFSET(BP_scratch, boot_params, scratch);
145 OFFSET(BP_loadflags, boot_params, hdr.loadflags);
146 OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
147 OFFSET(BP_version, boot_params, hdr.version);
148 OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
149} 80}
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 4a6aeedcd965..e72a1194af22 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -1,27 +1,4 @@
1/*
2 * Generate definitions needed by assembly language modules.
3 * This code generates raw asm output which is post-processed to extract
4 * and format the required data.
5 */
6#define COMPILE_OFFSETS
7
8#include <linux/crypto.h>
9#include <linux/sched.h>
10#include <linux/stddef.h>
11#include <linux/errno.h>
12#include <linux/hardirq.h>
13#include <linux/suspend.h>
14#include <linux/kbuild.h>
15#include <asm/processor.h>
16#include <asm/segment.h>
17#include <asm/thread_info.h>
18#include <asm/ia32.h> 1#include <asm/ia32.h>
19#include <asm/bootparam.h>
20#include <asm/suspend.h>
21
22#include <xen/interface/xen.h>
23
24#include <asm/sigframe.h>
25 2
26#define __NO_STUBS 1 3#define __NO_STUBS 1
27#undef __SYSCALL 4#undef __SYSCALL
@@ -33,41 +10,19 @@ static char syscalls[] = {
33 10
34int main(void) 11int main(void)
35{ 12{
36#define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry))
37 ENTRY(state);
38 ENTRY(flags);
39 ENTRY(pid);
40 BLANK();
41#undef ENTRY
42#define ENTRY(entry) DEFINE(TI_ ## entry, offsetof(struct thread_info, entry))
43 ENTRY(flags);
44 ENTRY(addr_limit);
45 ENTRY(preempt_count);
46 ENTRY(status);
47#ifdef CONFIG_IA32_EMULATION
48 ENTRY(sysenter_return);
49#endif
50 BLANK();
51#undef ENTRY
52#ifdef CONFIG_PARAVIRT 13#ifdef CONFIG_PARAVIRT
53 BLANK();
54 OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
55 OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
56 OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
57 OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
58 OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
59 OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame); 14 OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
60 OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
61 OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32); 15 OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32);
62 OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); 16 OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
63 OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
64 OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); 17 OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
65 OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); 18 BLANK();
66#endif 19#endif
67 20
68
69#ifdef CONFIG_IA32_EMULATION 21#ifdef CONFIG_IA32_EMULATION
70#define ENTRY(entry) DEFINE(IA32_SIGCONTEXT_ ## entry, offsetof(struct sigcontext_ia32, entry)) 22 OFFSET(TI_sysenter_return, thread_info, sysenter_return);
23 BLANK();
24
25#define ENTRY(entry) OFFSET(IA32_SIGCONTEXT_ ## entry, sigcontext_ia32, entry)
71 ENTRY(ax); 26 ENTRY(ax);
72 ENTRY(bx); 27 ENTRY(bx);
73 ENTRY(cx); 28 ENTRY(cx);
@@ -79,15 +34,12 @@ int main(void)
79 ENTRY(ip); 34 ENTRY(ip);
80 BLANK(); 35 BLANK();
81#undef ENTRY 36#undef ENTRY
82 DEFINE(IA32_RT_SIGFRAME_sigcontext, 37
83 offsetof (struct rt_sigframe_ia32, uc.uc_mcontext)); 38 OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
84 BLANK(); 39 BLANK();
85#endif 40#endif
86 DEFINE(pbe_address, offsetof(struct pbe, address)); 41
87 DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address)); 42#define ENTRY(entry) OFFSET(pt_regs_ ## entry, pt_regs, entry)
88 DEFINE(pbe_next, offsetof(struct pbe, next));
89 BLANK();
90#define ENTRY(entry) DEFINE(pt_regs_ ## entry, offsetof(struct pt_regs, entry))
91 ENTRY(bx); 43 ENTRY(bx);
92 ENTRY(bx); 44 ENTRY(bx);
93 ENTRY(cx); 45 ENTRY(cx);
@@ -107,7 +59,8 @@ int main(void)
107 ENTRY(flags); 59 ENTRY(flags);
108 BLANK(); 60 BLANK();
109#undef ENTRY 61#undef ENTRY
110#define ENTRY(entry) DEFINE(saved_context_ ## entry, offsetof(struct saved_context, entry)) 62
63#define ENTRY(entry) OFFSET(saved_context_ ## entry, saved_context, entry)
111 ENTRY(cr0); 64 ENTRY(cr0);
112 ENTRY(cr2); 65 ENTRY(cr2);
113 ENTRY(cr3); 66 ENTRY(cr3);
@@ -115,26 +68,11 @@ int main(void)
115 ENTRY(cr8); 68 ENTRY(cr8);
116 BLANK(); 69 BLANK();
117#undef ENTRY 70#undef ENTRY
118 DEFINE(TSS_ist, offsetof(struct tss_struct, x86_tss.ist));
119 BLANK();
120 DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx));
121 BLANK();
122 DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
123 71
72 OFFSET(TSS_ist, tss_struct, x86_tss.ist);
124 BLANK(); 73 BLANK();
125 OFFSET(BP_scratch, boot_params, scratch);
126 OFFSET(BP_loadflags, boot_params, hdr.loadflags);
127 OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
128 OFFSET(BP_version, boot_params, hdr.version);
129 OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
130 74
131 BLANK(); 75 DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
132 DEFINE(PAGE_SIZE_asm, PAGE_SIZE); 76
133#ifdef CONFIG_XEN
134 BLANK();
135 OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
136 OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
137#undef ENTRY
138#endif
139 return 0; 77 return 0;
140} 78}
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 13a389179514..452932d34730 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -106,8 +106,8 @@ void __init setup_bios_corruption_check(void)
106 addr += size; 106 addr += size;
107 } 107 }
108 108
109 printk(KERN_INFO "Scanning %d areas for low memory corruption\n", 109 if (num_scan_areas)
110 num_scan_areas); 110 printk(KERN_INFO "Scanning %d areas for low memory corruption\n", num_scan_areas);
111} 111}
112 112
113 113
@@ -143,12 +143,12 @@ static void check_corruption(struct work_struct *dummy)
143{ 143{
144 check_for_bios_corruption(); 144 check_for_bios_corruption();
145 schedule_delayed_work(&bios_check_work, 145 schedule_delayed_work(&bios_check_work,
146 round_jiffies_relative(corruption_check_period*HZ)); 146 round_jiffies_relative(corruption_check_period*HZ));
147} 147}
148 148
149static int start_periodic_check_for_corruption(void) 149static int start_periodic_check_for_corruption(void)
150{ 150{
151 if (!memory_corruption_check || corruption_check_period == 0) 151 if (!num_scan_areas || !memory_corruption_check || corruption_check_period == 0)
152 return 0; 152 return 0;
153 153
154 printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n", 154 printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n",
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7c7bedb83c5a..f771ab6b49e9 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -233,18 +233,22 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
233} 233}
234#endif 234#endif
235 235
236#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 236#ifdef CONFIG_NUMA
237/*
238 * To workaround broken NUMA config. Read the comment in
239 * srat_detect_node().
240 */
237static int __cpuinit nearby_node(int apicid) 241static int __cpuinit nearby_node(int apicid)
238{ 242{
239 int i, node; 243 int i, node;
240 244
241 for (i = apicid - 1; i >= 0; i--) { 245 for (i = apicid - 1; i >= 0; i--) {
242 node = apicid_to_node[i]; 246 node = __apicid_to_node[i];
243 if (node != NUMA_NO_NODE && node_online(node)) 247 if (node != NUMA_NO_NODE && node_online(node))
244 return node; 248 return node;
245 } 249 }
246 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { 250 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
247 node = apicid_to_node[i]; 251 node = __apicid_to_node[i];
248 if (node != NUMA_NO_NODE && node_online(node)) 252 if (node != NUMA_NO_NODE && node_online(node))
249 return node; 253 return node;
250 } 254 }
@@ -261,7 +265,7 @@ static int __cpuinit nearby_node(int apicid)
261#ifdef CONFIG_X86_HT 265#ifdef CONFIG_X86_HT
262static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c) 266static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
263{ 267{
264 u32 nodes; 268 u32 nodes, cores_per_cu = 1;
265 u8 node_id; 269 u8 node_id;
266 int cpu = smp_processor_id(); 270 int cpu = smp_processor_id();
267 271
@@ -276,6 +280,7 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
276 /* get compute unit information */ 280 /* get compute unit information */
277 smp_num_siblings = ((ebx >> 8) & 3) + 1; 281 smp_num_siblings = ((ebx >> 8) & 3) + 1;
278 c->compute_unit_id = ebx & 0xff; 282 c->compute_unit_id = ebx & 0xff;
283 cores_per_cu += ((ebx >> 8) & 3);
279 } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { 284 } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
280 u64 value; 285 u64 value;
281 286
@@ -288,15 +293,18 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
288 /* fixup multi-node processor information */ 293 /* fixup multi-node processor information */
289 if (nodes > 1) { 294 if (nodes > 1) {
290 u32 cores_per_node; 295 u32 cores_per_node;
296 u32 cus_per_node;
291 297
292 set_cpu_cap(c, X86_FEATURE_AMD_DCM); 298 set_cpu_cap(c, X86_FEATURE_AMD_DCM);
293 cores_per_node = c->x86_max_cores / nodes; 299 cores_per_node = c->x86_max_cores / nodes;
300 cus_per_node = cores_per_node / cores_per_cu;
294 301
295 /* store NodeID, use llc_shared_map to store sibling info */ 302 /* store NodeID, use llc_shared_map to store sibling info */
296 per_cpu(cpu_llc_id, cpu) = node_id; 303 per_cpu(cpu_llc_id, cpu) = node_id;
297 304
298 /* core id to be in range from 0 to (cores_per_node - 1) */ 305 /* core id has to be in the [0 .. cores_per_node - 1] range */
299 c->cpu_core_id = c->cpu_core_id % cores_per_node; 306 c->cpu_core_id %= cores_per_node;
307 c->compute_unit_id %= cus_per_node;
300 } 308 }
301} 309}
302#endif 310#endif
@@ -334,31 +342,40 @@ EXPORT_SYMBOL_GPL(amd_get_nb_id);
334 342
335static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) 343static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
336{ 344{
337#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 345#ifdef CONFIG_NUMA
338 int cpu = smp_processor_id(); 346 int cpu = smp_processor_id();
339 int node; 347 int node;
340 unsigned apicid = c->apicid; 348 unsigned apicid = c->apicid;
341 349
342 node = per_cpu(cpu_llc_id, cpu); 350 node = numa_cpu_node(cpu);
351 if (node == NUMA_NO_NODE)
352 node = per_cpu(cpu_llc_id, cpu);
343 353
344 if (apicid_to_node[apicid] != NUMA_NO_NODE)
345 node = apicid_to_node[apicid];
346 if (!node_online(node)) { 354 if (!node_online(node)) {
347 /* Two possibilities here: 355 /*
348 - The CPU is missing memory and no node was created. 356 * Two possibilities here:
349 In that case try picking one from a nearby CPU 357 *
350 - The APIC IDs differ from the HyperTransport node IDs 358 * - The CPU is missing memory and no node was created. In
351 which the K8 northbridge parsing fills in. 359 * that case try picking one from a nearby CPU.
352 Assume they are all increased by a constant offset, 360 *
353 but in the same order as the HT nodeids. 361 * - The APIC IDs differ from the HyperTransport node IDs
354 If that doesn't result in a usable node fall back to the 362 * which the K8 northbridge parsing fills in. Assume
355 path for the previous case. */ 363 * they are all increased by a constant offset, but in
356 364 * the same order as the HT nodeids. If that doesn't
365 * result in a usable node fall back to the path for the
366 * previous case.
367 *
368 * This workaround operates directly on the mapping between
369 * APIC ID and NUMA node, assuming certain relationship
370 * between APIC ID, HT node ID and NUMA topology. As going
371 * through CPU mapping may alter the outcome, directly
372 * access __apicid_to_node[].
373 */
357 int ht_nodeid = c->initial_apicid; 374 int ht_nodeid = c->initial_apicid;
358 375
359 if (ht_nodeid >= 0 && 376 if (ht_nodeid >= 0 &&
360 apicid_to_node[ht_nodeid] != NUMA_NO_NODE) 377 __apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
361 node = apicid_to_node[ht_nodeid]; 378 node = __apicid_to_node[ht_nodeid];
362 /* Pick a nearby node */ 379 /* Pick a nearby node */
363 if (!node_online(node)) 380 if (!node_online(node))
364 node = nearby_node(apicid); 381 node = nearby_node(apicid);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 1d59834396bd..e2ced0074a45 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -675,7 +675,7 @@ void __init early_cpu_init(void)
675 const struct cpu_dev *const *cdev; 675 const struct cpu_dev *const *cdev;
676 int count = 0; 676 int count = 0;
677 677
678#ifdef PROCESSOR_SELECT 678#ifdef CONFIG_PROCESSOR_SELECT
679 printk(KERN_INFO "KERNEL supported cpus:\n"); 679 printk(KERN_INFO "KERNEL supported cpus:\n");
680#endif 680#endif
681 681
@@ -687,7 +687,7 @@ void __init early_cpu_init(void)
687 cpu_devs[count] = cpudev; 687 cpu_devs[count] = cpudev;
688 count++; 688 count++;
689 689
690#ifdef PROCESSOR_SELECT 690#ifdef CONFIG_PROCESSOR_SELECT
691 { 691 {
692 unsigned int j; 692 unsigned int j;
693 693
@@ -869,7 +869,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
869 869
870 select_idle_routine(c); 870 select_idle_routine(c);
871 871
872#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 872#ifdef CONFIG_NUMA
873 numa_add_cpu(smp_processor_id()); 873 numa_add_cpu(smp_processor_id());
874#endif 874#endif
875} 875}
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index bd1cac747f67..52c93648e492 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -158,9 +158,9 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
158{ 158{
159 if (c->x86 == 0x06) { 159 if (c->x86 == 0x06) {
160 if (cpu_has(c, X86_FEATURE_EST)) 160 if (cpu_has(c, X86_FEATURE_EST))
161 printk(KERN_WARNING PFX "Warning: EST-capable CPU " 161 printk_once(KERN_WARNING PFX "Warning: EST-capable "
162 "detected. The acpi-cpufreq module offers " 162 "CPU detected. The acpi-cpufreq module offers "
163 "voltage scaling in addition of frequency " 163 "voltage scaling in addition to frequency "
164 "scaling. You should use that instead of " 164 "scaling. You should use that instead of "
165 "p4-clockmod, if possible.\n"); 165 "p4-clockmod, if possible.\n");
166 switch (c->x86_model) { 166 switch (c->x86_model) {
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
index 4f6f679f2799..4a5a42b842ad 100644
--- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
@@ -195,7 +195,7 @@ static unsigned int pcc_get_freq(unsigned int cpu)
195cmd_incomplete: 195cmd_incomplete:
196 iowrite16(0, &pcch_hdr->status); 196 iowrite16(0, &pcch_hdr->status);
197 spin_unlock(&pcc_lock); 197 spin_unlock(&pcc_lock);
198 return -EINVAL; 198 return 0;
199} 199}
200 200
201static int pcc_cpufreq_target(struct cpufreq_policy *policy, 201static int pcc_cpufreq_target(struct cpufreq_policy *policy,
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 35c7e65e59be..c567dec854f6 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1537,6 +1537,7 @@ static struct notifier_block cpb_nb = {
1537static int __cpuinit powernowk8_init(void) 1537static int __cpuinit powernowk8_init(void)
1538{ 1538{
1539 unsigned int i, supported_cpus = 0, cpu; 1539 unsigned int i, supported_cpus = 0, cpu;
1540 int rv;
1540 1541
1541 for_each_online_cpu(i) { 1542 for_each_online_cpu(i) {
1542 int rc; 1543 int rc;
@@ -1555,14 +1556,14 @@ static int __cpuinit powernowk8_init(void)
1555 1556
1556 cpb_capable = true; 1557 cpb_capable = true;
1557 1558
1558 register_cpu_notifier(&cpb_nb);
1559
1560 msrs = msrs_alloc(); 1559 msrs = msrs_alloc();
1561 if (!msrs) { 1560 if (!msrs) {
1562 printk(KERN_ERR "%s: Error allocating msrs!\n", __func__); 1561 printk(KERN_ERR "%s: Error allocating msrs!\n", __func__);
1563 return -ENOMEM; 1562 return -ENOMEM;
1564 } 1563 }
1565 1564
1565 register_cpu_notifier(&cpb_nb);
1566
1566 rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); 1567 rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
1567 1568
1568 for_each_cpu(cpu, cpu_online_mask) { 1569 for_each_cpu(cpu, cpu_online_mask) {
@@ -1574,7 +1575,13 @@ static int __cpuinit powernowk8_init(void)
1574 (cpb_enabled ? "on" : "off")); 1575 (cpb_enabled ? "on" : "off"));
1575 } 1576 }
1576 1577
1577 return cpufreq_register_driver(&cpufreq_amd64_driver); 1578 rv = cpufreq_register_driver(&cpufreq_amd64_driver);
1579 if (rv < 0 && boot_cpu_has(X86_FEATURE_CPB)) {
1580 unregister_cpu_notifier(&cpb_nb);
1581 msrs_free(msrs);
1582 msrs = NULL;
1583 }
1584 return rv;
1578} 1585}
1579 1586
1580/* driver entry point for term */ 1587/* driver entry point for term */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index d16c2c53d6bf..df86bc8c859d 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -276,14 +276,13 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
276 276
277static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) 277static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
278{ 278{
279#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 279#ifdef CONFIG_NUMA
280 unsigned node; 280 unsigned node;
281 int cpu = smp_processor_id(); 281 int cpu = smp_processor_id();
282 int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
283 282
284 /* Don't do the funky fallback heuristics the AMD version employs 283 /* Don't do the funky fallback heuristics the AMD version employs
285 for now. */ 284 for now. */
286 node = apicid_to_node[apicid]; 285 node = numa_cpu_node(cpu);
287 if (node == NUMA_NO_NODE || !node_online(node)) { 286 if (node == NUMA_NO_NODE || !node_online(node)) {
288 /* reuse the value from init_cpu_to_node() */ 287 /* reuse the value from init_cpu_to_node() */
289 node = cpu_to_node(cpu); 288 node = cpu_to_node(cpu);
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index ec2c19a7b8ef..1ce1af2899df 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -304,8 +304,9 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
304 304
305struct _cache_attr { 305struct _cache_attr {
306 struct attribute attr; 306 struct attribute attr;
307 ssize_t (*show)(struct _cpuid4_info *, char *); 307 ssize_t (*show)(struct _cpuid4_info *, char *, unsigned int);
308 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); 308 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count,
309 unsigned int);
309}; 310};
310 311
311#ifdef CONFIG_AMD_NB 312#ifdef CONFIG_AMD_NB
@@ -400,7 +401,8 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
400 401
401#define SHOW_CACHE_DISABLE(slot) \ 402#define SHOW_CACHE_DISABLE(slot) \
402static ssize_t \ 403static ssize_t \
403show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf) \ 404show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf, \
405 unsigned int cpu) \
404{ \ 406{ \
405 return show_cache_disable(this_leaf, buf, slot); \ 407 return show_cache_disable(this_leaf, buf, slot); \
406} 408}
@@ -512,7 +514,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
512#define STORE_CACHE_DISABLE(slot) \ 514#define STORE_CACHE_DISABLE(slot) \
513static ssize_t \ 515static ssize_t \
514store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \ 516store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \
515 const char *buf, size_t count) \ 517 const char *buf, size_t count, \
518 unsigned int cpu) \
516{ \ 519{ \
517 return store_cache_disable(this_leaf, buf, count, slot); \ 520 return store_cache_disable(this_leaf, buf, count, slot); \
518} 521}
@@ -524,6 +527,39 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
524static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, 527static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
525 show_cache_disable_1, store_cache_disable_1); 528 show_cache_disable_1, store_cache_disable_1);
526 529
530static ssize_t
531show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu)
532{
533 if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
534 return -EINVAL;
535
536 return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
537}
538
539static ssize_t
540store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
541 unsigned int cpu)
542{
543 unsigned long val;
544
545 if (!capable(CAP_SYS_ADMIN))
546 return -EPERM;
547
548 if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
549 return -EINVAL;
550
551 if (strict_strtoul(buf, 16, &val) < 0)
552 return -EINVAL;
553
554 if (amd_set_subcaches(cpu, val))
555 return -EINVAL;
556
557 return count;
558}
559
560static struct _cache_attr subcaches =
561 __ATTR(subcaches, 0644, show_subcaches, store_subcaches);
562
527#else /* CONFIG_AMD_NB */ 563#else /* CONFIG_AMD_NB */
528#define amd_init_l3_cache(x, y) 564#define amd_init_l3_cache(x, y)
529#endif /* CONFIG_AMD_NB */ 565#endif /* CONFIG_AMD_NB */
@@ -532,9 +568,9 @@ static int
532__cpuinit cpuid4_cache_lookup_regs(int index, 568__cpuinit cpuid4_cache_lookup_regs(int index,
533 struct _cpuid4_info_regs *this_leaf) 569 struct _cpuid4_info_regs *this_leaf)
534{ 570{
535 union _cpuid4_leaf_eax eax; 571 union _cpuid4_leaf_eax eax;
536 union _cpuid4_leaf_ebx ebx; 572 union _cpuid4_leaf_ebx ebx;
537 union _cpuid4_leaf_ecx ecx; 573 union _cpuid4_leaf_ecx ecx;
538 unsigned edx; 574 unsigned edx;
539 575
540 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { 576 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
@@ -732,11 +768,11 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
732 struct cpuinfo_x86 *c = &cpu_data(cpu); 768 struct cpuinfo_x86 *c = &cpu_data(cpu);
733 769
734 if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) { 770 if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
735 for_each_cpu(i, c->llc_shared_map) { 771 for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
736 if (!per_cpu(ici_cpuid4_info, i)) 772 if (!per_cpu(ici_cpuid4_info, i))
737 continue; 773 continue;
738 this_leaf = CPUID4_INFO_IDX(i, index); 774 this_leaf = CPUID4_INFO_IDX(i, index);
739 for_each_cpu(sibling, c->llc_shared_map) { 775 for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
740 if (!cpu_online(sibling)) 776 if (!cpu_online(sibling))
741 continue; 777 continue;
742 set_bit(sibling, this_leaf->shared_cpu_map); 778 set_bit(sibling, this_leaf->shared_cpu_map);
@@ -870,8 +906,8 @@ static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
870#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y])) 906#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y]))
871 907
872#define show_one_plus(file_name, object, val) \ 908#define show_one_plus(file_name, object, val) \
873static ssize_t show_##file_name \ 909static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \
874 (struct _cpuid4_info *this_leaf, char *buf) \ 910 unsigned int cpu) \
875{ \ 911{ \
876 return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \ 912 return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
877} 913}
@@ -882,7 +918,8 @@ show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
882show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1); 918show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
883show_one_plus(number_of_sets, ecx.split.number_of_sets, 1); 919show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
884 920
885static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf) 921static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf,
922 unsigned int cpu)
886{ 923{
887 return sprintf(buf, "%luK\n", this_leaf->size / 1024); 924 return sprintf(buf, "%luK\n", this_leaf->size / 1024);
888} 925}
@@ -906,17 +943,20 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
906 return n; 943 return n;
907} 944}
908 945
909static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf) 946static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf,
947 unsigned int cpu)
910{ 948{
911 return show_shared_cpu_map_func(leaf, 0, buf); 949 return show_shared_cpu_map_func(leaf, 0, buf);
912} 950}
913 951
914static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf) 952static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf,
953 unsigned int cpu)
915{ 954{
916 return show_shared_cpu_map_func(leaf, 1, buf); 955 return show_shared_cpu_map_func(leaf, 1, buf);
917} 956}
918 957
919static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) 958static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf,
959 unsigned int cpu)
920{ 960{
921 switch (this_leaf->eax.split.type) { 961 switch (this_leaf->eax.split.type) {
922 case CACHE_TYPE_DATA: 962 case CACHE_TYPE_DATA:
@@ -974,6 +1014,9 @@ static struct attribute ** __cpuinit amd_l3_attrs(void)
974 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) 1014 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
975 n += 2; 1015 n += 2;
976 1016
1017 if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
1018 n += 1;
1019
977 attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL); 1020 attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
978 if (attrs == NULL) 1021 if (attrs == NULL)
979 return attrs = default_attrs; 1022 return attrs = default_attrs;
@@ -986,6 +1029,9 @@ static struct attribute ** __cpuinit amd_l3_attrs(void)
986 attrs[n++] = &cache_disable_1.attr; 1029 attrs[n++] = &cache_disable_1.attr;
987 } 1030 }
988 1031
1032 if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
1033 attrs[n++] = &subcaches.attr;
1034
989 return attrs; 1035 return attrs;
990} 1036}
991#endif 1037#endif
@@ -998,7 +1044,7 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
998 1044
999 ret = fattr->show ? 1045 ret = fattr->show ?
1000 fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), 1046 fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1001 buf) : 1047 buf, this_leaf->cpu) :
1002 0; 1048 0;
1003 return ret; 1049 return ret;
1004} 1050}
@@ -1012,7 +1058,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
1012 1058
1013 ret = fattr->store ? 1059 ret = fattr->store ?
1014 fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), 1060 fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1015 buf, count) : 1061 buf, count, this_leaf->cpu) :
1016 0; 1062 0;
1017 return ret; 1063 return ret;
1018} 1064}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 5bf2fac52aca..167f97b5596e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -527,15 +527,12 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
527 int i, err = 0; 527 int i, err = 0;
528 struct threshold_bank *b = NULL; 528 struct threshold_bank *b = NULL;
529 char name[32]; 529 char name[32];
530#ifdef CONFIG_SMP
531 struct cpuinfo_x86 *c = &cpu_data(cpu);
532#endif
533 530
534 sprintf(name, "threshold_bank%i", bank); 531 sprintf(name, "threshold_bank%i", bank);
535 532
536#ifdef CONFIG_SMP 533#ifdef CONFIG_SMP
537 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ 534 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
538 i = cpumask_first(c->llc_shared_map); 535 i = cpumask_first(cpu_llc_shared_mask(cpu));
539 536
540 /* first core not up yet */ 537 /* first core not up yet */
541 if (cpu_data(i).cpu_core_id) 538 if (cpu_data(i).cpu_core_id)
@@ -555,7 +552,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
555 if (err) 552 if (err)
556 goto out; 553 goto out;
557 554
558 cpumask_copy(b->cpus, c->llc_shared_map); 555 cpumask_copy(b->cpus, cpu_llc_shared_mask(cpu));
559 per_cpu(threshold_banks, cpu)[bank] = b; 556 per_cpu(threshold_banks, cpu)[bank] = b;
560 557
561 goto out; 558 goto out;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 9d977a2ea693..26604188aa49 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -30,6 +30,7 @@
30#include <asm/stacktrace.h> 30#include <asm/stacktrace.h>
31#include <asm/nmi.h> 31#include <asm/nmi.h>
32#include <asm/compat.h> 32#include <asm/compat.h>
33#include <asm/smp.h>
33 34
34#if 0 35#if 0
35#undef wrmsrl 36#undef wrmsrl
@@ -93,6 +94,8 @@ struct amd_nb {
93 struct event_constraint event_constraints[X86_PMC_IDX_MAX]; 94 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
94}; 95};
95 96
97struct intel_percore;
98
96#define MAX_LBR_ENTRIES 16 99#define MAX_LBR_ENTRIES 16
97 100
98struct cpu_hw_events { 101struct cpu_hw_events {
@@ -128,6 +131,13 @@ struct cpu_hw_events {
128 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; 131 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
129 132
130 /* 133 /*
134 * Intel percore register state.
135 * Coordinate shared resources between HT threads.
136 */
137 int percore_used; /* Used by this CPU? */
138 struct intel_percore *per_core;
139
140 /*
131 * AMD specific bits 141 * AMD specific bits
132 */ 142 */
133 struct amd_nb *amd_nb; 143 struct amd_nb *amd_nb;
@@ -166,8 +176,10 @@ struct cpu_hw_events {
166/* 176/*
167 * Constraint on the Event code + UMask 177 * Constraint on the Event code + UMask
168 */ 178 */
169#define PEBS_EVENT_CONSTRAINT(c, n) \ 179#define INTEL_UEVENT_CONSTRAINT(c, n) \
170 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) 180 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
181#define PEBS_EVENT_CONSTRAINT(c, n) \
182 INTEL_UEVENT_CONSTRAINT(c, n)
171 183
172#define EVENT_CONSTRAINT_END \ 184#define EVENT_CONSTRAINT_END \
173 EVENT_CONSTRAINT(0, 0, 0) 185 EVENT_CONSTRAINT(0, 0, 0)
@@ -175,6 +187,28 @@ struct cpu_hw_events {
175#define for_each_event_constraint(e, c) \ 187#define for_each_event_constraint(e, c) \
176 for ((e) = (c); (e)->weight; (e)++) 188 for ((e) = (c); (e)->weight; (e)++)
177 189
190/*
191 * Extra registers for specific events.
192 * Some events need large masks and require external MSRs.
193 * Define a mapping to these extra registers.
194 */
195struct extra_reg {
196 unsigned int event;
197 unsigned int msr;
198 u64 config_mask;
199 u64 valid_mask;
200};
201
202#define EVENT_EXTRA_REG(e, ms, m, vm) { \
203 .event = (e), \
204 .msr = (ms), \
205 .config_mask = (m), \
206 .valid_mask = (vm), \
207 }
208#define INTEL_EVENT_EXTRA_REG(event, msr, vm) \
209 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm)
210#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0)
211
178union perf_capabilities { 212union perf_capabilities {
179 struct { 213 struct {
180 u64 lbr_format : 6; 214 u64 lbr_format : 6;
@@ -219,6 +253,7 @@ struct x86_pmu {
219 void (*put_event_constraints)(struct cpu_hw_events *cpuc, 253 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
220 struct perf_event *event); 254 struct perf_event *event);
221 struct event_constraint *event_constraints; 255 struct event_constraint *event_constraints;
256 struct event_constraint *percore_constraints;
222 void (*quirks)(void); 257 void (*quirks)(void);
223 int perfctr_second_write; 258 int perfctr_second_write;
224 259
@@ -247,6 +282,11 @@ struct x86_pmu {
247 */ 282 */
248 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ 283 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
249 int lbr_nr; /* hardware stack size */ 284 int lbr_nr; /* hardware stack size */
285
286 /*
287 * Extra registers for events
288 */
289 struct extra_reg *extra_regs;
250}; 290};
251 291
252static struct x86_pmu x86_pmu __read_mostly; 292static struct x86_pmu x86_pmu __read_mostly;
@@ -271,6 +311,10 @@ static u64 __read_mostly hw_cache_event_ids
271 [PERF_COUNT_HW_CACHE_MAX] 311 [PERF_COUNT_HW_CACHE_MAX]
272 [PERF_COUNT_HW_CACHE_OP_MAX] 312 [PERF_COUNT_HW_CACHE_OP_MAX]
273 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 313 [PERF_COUNT_HW_CACHE_RESULT_MAX];
314static u64 __read_mostly hw_cache_extra_regs
315 [PERF_COUNT_HW_CACHE_MAX]
316 [PERF_COUNT_HW_CACHE_OP_MAX]
317 [PERF_COUNT_HW_CACHE_RESULT_MAX];
274 318
275/* 319/*
276 * Propagate event elapsed time into the generic event. 320 * Propagate event elapsed time into the generic event.
@@ -298,7 +342,7 @@ x86_perf_event_update(struct perf_event *event)
298 */ 342 */
299again: 343again:
300 prev_raw_count = local64_read(&hwc->prev_count); 344 prev_raw_count = local64_read(&hwc->prev_count);
301 rdmsrl(hwc->event_base + idx, new_raw_count); 345 rdmsrl(hwc->event_base, new_raw_count);
302 346
303 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, 347 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
304 new_raw_count) != prev_raw_count) 348 new_raw_count) != prev_raw_count)
@@ -321,6 +365,49 @@ again:
321 return new_raw_count; 365 return new_raw_count;
322} 366}
323 367
368/* using X86_FEATURE_PERFCTR_CORE to later implement ALTERNATIVE() here */
369static inline int x86_pmu_addr_offset(int index)
370{
371 if (boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
372 return index << 1;
373 return index;
374}
375
376static inline unsigned int x86_pmu_config_addr(int index)
377{
378 return x86_pmu.eventsel + x86_pmu_addr_offset(index);
379}
380
381static inline unsigned int x86_pmu_event_addr(int index)
382{
383 return x86_pmu.perfctr + x86_pmu_addr_offset(index);
384}
385
386/*
387 * Find and validate any extra registers to set up.
388 */
389static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
390{
391 struct extra_reg *er;
392
393 event->hw.extra_reg = 0;
394 event->hw.extra_config = 0;
395
396 if (!x86_pmu.extra_regs)
397 return 0;
398
399 for (er = x86_pmu.extra_regs; er->msr; er++) {
400 if (er->event != (config & er->config_mask))
401 continue;
402 if (event->attr.config1 & ~er->valid_mask)
403 return -EINVAL;
404 event->hw.extra_reg = er->msr;
405 event->hw.extra_config = event->attr.config1;
406 break;
407 }
408 return 0;
409}
410
324static atomic_t active_events; 411static atomic_t active_events;
325static DEFINE_MUTEX(pmc_reserve_mutex); 412static DEFINE_MUTEX(pmc_reserve_mutex);
326 413
@@ -331,12 +418,12 @@ static bool reserve_pmc_hardware(void)
331 int i; 418 int i;
332 419
333 for (i = 0; i < x86_pmu.num_counters; i++) { 420 for (i = 0; i < x86_pmu.num_counters; i++) {
334 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) 421 if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
335 goto perfctr_fail; 422 goto perfctr_fail;
336 } 423 }
337 424
338 for (i = 0; i < x86_pmu.num_counters; i++) { 425 for (i = 0; i < x86_pmu.num_counters; i++) {
339 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) 426 if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
340 goto eventsel_fail; 427 goto eventsel_fail;
341 } 428 }
342 429
@@ -344,13 +431,13 @@ static bool reserve_pmc_hardware(void)
344 431
345eventsel_fail: 432eventsel_fail:
346 for (i--; i >= 0; i--) 433 for (i--; i >= 0; i--)
347 release_evntsel_nmi(x86_pmu.eventsel + i); 434 release_evntsel_nmi(x86_pmu_config_addr(i));
348 435
349 i = x86_pmu.num_counters; 436 i = x86_pmu.num_counters;
350 437
351perfctr_fail: 438perfctr_fail:
352 for (i--; i >= 0; i--) 439 for (i--; i >= 0; i--)
353 release_perfctr_nmi(x86_pmu.perfctr + i); 440 release_perfctr_nmi(x86_pmu_event_addr(i));
354 441
355 return false; 442 return false;
356} 443}
@@ -360,8 +447,8 @@ static void release_pmc_hardware(void)
360 int i; 447 int i;
361 448
362 for (i = 0; i < x86_pmu.num_counters; i++) { 449 for (i = 0; i < x86_pmu.num_counters; i++) {
363 release_perfctr_nmi(x86_pmu.perfctr + i); 450 release_perfctr_nmi(x86_pmu_event_addr(i));
364 release_evntsel_nmi(x86_pmu.eventsel + i); 451 release_evntsel_nmi(x86_pmu_config_addr(i));
365 } 452 }
366} 453}
367 454
@@ -382,7 +469,7 @@ static bool check_hw_exists(void)
382 * complain and bail. 469 * complain and bail.
383 */ 470 */
384 for (i = 0; i < x86_pmu.num_counters; i++) { 471 for (i = 0; i < x86_pmu.num_counters; i++) {
385 reg = x86_pmu.eventsel + i; 472 reg = x86_pmu_config_addr(i);
386 ret = rdmsrl_safe(reg, &val); 473 ret = rdmsrl_safe(reg, &val);
387 if (ret) 474 if (ret)
388 goto msr_fail; 475 goto msr_fail;
@@ -407,8 +494,8 @@ static bool check_hw_exists(void)
407 * that don't trap on the MSR access and always return 0s. 494 * that don't trap on the MSR access and always return 0s.
408 */ 495 */
409 val = 0xabcdUL; 496 val = 0xabcdUL;
410 ret = checking_wrmsrl(x86_pmu.perfctr, val); 497 ret = checking_wrmsrl(x86_pmu_event_addr(0), val);
411 ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new); 498 ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new);
412 if (ret || val != val_new) 499 if (ret || val != val_new)
413 goto msr_fail; 500 goto msr_fail;
414 501
@@ -442,8 +529,9 @@ static inline int x86_pmu_initialized(void)
442} 529}
443 530
444static inline int 531static inline int
445set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) 532set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
446{ 533{
534 struct perf_event_attr *attr = &event->attr;
447 unsigned int cache_type, cache_op, cache_result; 535 unsigned int cache_type, cache_op, cache_result;
448 u64 config, val; 536 u64 config, val;
449 537
@@ -470,8 +558,8 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
470 return -EINVAL; 558 return -EINVAL;
471 559
472 hwc->config |= val; 560 hwc->config |= val;
473 561 attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
474 return 0; 562 return x86_pmu_extra_regs(val, event);
475} 563}
476 564
477static int x86_setup_perfctr(struct perf_event *event) 565static int x86_setup_perfctr(struct perf_event *event)
@@ -496,10 +584,10 @@ static int x86_setup_perfctr(struct perf_event *event)
496 } 584 }
497 585
498 if (attr->type == PERF_TYPE_RAW) 586 if (attr->type == PERF_TYPE_RAW)
499 return 0; 587 return x86_pmu_extra_regs(event->attr.config, event);
500 588
501 if (attr->type == PERF_TYPE_HW_CACHE) 589 if (attr->type == PERF_TYPE_HW_CACHE)
502 return set_ext_hw_attr(hwc, attr); 590 return set_ext_hw_attr(hwc, event);
503 591
504 if (attr->config >= x86_pmu.max_events) 592 if (attr->config >= x86_pmu.max_events)
505 return -EINVAL; 593 return -EINVAL;
@@ -617,11 +705,11 @@ static void x86_pmu_disable_all(void)
617 705
618 if (!test_bit(idx, cpuc->active_mask)) 706 if (!test_bit(idx, cpuc->active_mask))
619 continue; 707 continue;
620 rdmsrl(x86_pmu.eventsel + idx, val); 708 rdmsrl(x86_pmu_config_addr(idx), val);
621 if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE)) 709 if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
622 continue; 710 continue;
623 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; 711 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
624 wrmsrl(x86_pmu.eventsel + idx, val); 712 wrmsrl(x86_pmu_config_addr(idx), val);
625 } 713 }
626} 714}
627 715
@@ -642,21 +730,26 @@ static void x86_pmu_disable(struct pmu *pmu)
642 x86_pmu.disable_all(); 730 x86_pmu.disable_all();
643} 731}
644 732
733static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
734 u64 enable_mask)
735{
736 if (hwc->extra_reg)
737 wrmsrl(hwc->extra_reg, hwc->extra_config);
738 wrmsrl(hwc->config_base, hwc->config | enable_mask);
739}
740
645static void x86_pmu_enable_all(int added) 741static void x86_pmu_enable_all(int added)
646{ 742{
647 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 743 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
648 int idx; 744 int idx;
649 745
650 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 746 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
651 struct perf_event *event = cpuc->events[idx]; 747 struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
652 u64 val;
653 748
654 if (!test_bit(idx, cpuc->active_mask)) 749 if (!test_bit(idx, cpuc->active_mask))
655 continue; 750 continue;
656 751
657 val = event->hw.config; 752 __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
658 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
659 wrmsrl(x86_pmu.eventsel + idx, val);
660 } 753 }
661} 754}
662 755
@@ -821,15 +914,10 @@ static inline void x86_assign_hw_event(struct perf_event *event,
821 hwc->event_base = 0; 914 hwc->event_base = 0;
822 } else if (hwc->idx >= X86_PMC_IDX_FIXED) { 915 } else if (hwc->idx >= X86_PMC_IDX_FIXED) {
823 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; 916 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
824 /* 917 hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0;
825 * We set it so that event_base + idx in wrmsr/rdmsr maps to
826 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
827 */
828 hwc->event_base =
829 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
830 } else { 918 } else {
831 hwc->config_base = x86_pmu.eventsel; 919 hwc->config_base = x86_pmu_config_addr(hwc->idx);
832 hwc->event_base = x86_pmu.perfctr; 920 hwc->event_base = x86_pmu_event_addr(hwc->idx);
833 } 921 }
834} 922}
835 923
@@ -915,17 +1003,11 @@ static void x86_pmu_enable(struct pmu *pmu)
915 x86_pmu.enable_all(added); 1003 x86_pmu.enable_all(added);
916} 1004}
917 1005
918static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
919 u64 enable_mask)
920{
921 wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask);
922}
923
924static inline void x86_pmu_disable_event(struct perf_event *event) 1006static inline void x86_pmu_disable_event(struct perf_event *event)
925{ 1007{
926 struct hw_perf_event *hwc = &event->hw; 1008 struct hw_perf_event *hwc = &event->hw;
927 1009
928 wrmsrl(hwc->config_base + hwc->idx, hwc->config); 1010 wrmsrl(hwc->config_base, hwc->config);
929} 1011}
930 1012
931static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); 1013static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -978,7 +1060,7 @@ x86_perf_event_set_period(struct perf_event *event)
978 */ 1060 */
979 local64_set(&hwc->prev_count, (u64)-left); 1061 local64_set(&hwc->prev_count, (u64)-left);
980 1062
981 wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask); 1063 wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
982 1064
983 /* 1065 /*
984 * Due to erratum on certan cpu we need 1066 * Due to erratum on certan cpu we need
@@ -986,7 +1068,7 @@ x86_perf_event_set_period(struct perf_event *event)
986 * is updated properly 1068 * is updated properly
987 */ 1069 */
988 if (x86_pmu.perfctr_second_write) { 1070 if (x86_pmu.perfctr_second_write) {
989 wrmsrl(hwc->event_base + idx, 1071 wrmsrl(hwc->event_base,
990 (u64)(-left) & x86_pmu.cntval_mask); 1072 (u64)(-left) & x86_pmu.cntval_mask);
991 } 1073 }
992 1074
@@ -1113,8 +1195,8 @@ void perf_event_print_debug(void)
1113 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); 1195 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
1114 1196
1115 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1197 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1116 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); 1198 rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
1117 rdmsrl(x86_pmu.perfctr + idx, pmc_count); 1199 rdmsrl(x86_pmu_event_addr(idx), pmc_count);
1118 1200
1119 prev_left = per_cpu(pmc_prev_left[idx], cpu); 1201 prev_left = per_cpu(pmc_prev_left[idx], cpu);
1120 1202
@@ -1389,7 +1471,7 @@ static void __init pmu_check_apic(void)
1389 pr_info("no hardware sampling interrupt available.\n"); 1471 pr_info("no hardware sampling interrupt available.\n");
1390} 1472}
1391 1473
1392int __init init_hw_perf_events(void) 1474static int __init init_hw_perf_events(void)
1393{ 1475{
1394 struct event_constraint *c; 1476 struct event_constraint *c;
1395 int err; 1477 int err;
@@ -1608,7 +1690,7 @@ out:
1608 return ret; 1690 return ret;
1609} 1691}
1610 1692
1611int x86_pmu_event_init(struct perf_event *event) 1693static int x86_pmu_event_init(struct perf_event *event)
1612{ 1694{
1613 struct pmu *tmp; 1695 struct pmu *tmp;
1614 int err; 1696 int err;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 67e2202a6039..461f62bbd774 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -127,6 +127,11 @@ static int amd_pmu_hw_config(struct perf_event *event)
127/* 127/*
128 * AMD64 events are detected based on their event codes. 128 * AMD64 events are detected based on their event codes.
129 */ 129 */
130static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
131{
132 return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
133}
134
130static inline int amd_is_nb_event(struct hw_perf_event *hwc) 135static inline int amd_is_nb_event(struct hw_perf_event *hwc)
131{ 136{
132 return (hwc->config & 0xe0) == 0xe0; 137 return (hwc->config & 0xe0) == 0xe0;
@@ -385,13 +390,181 @@ static __initconst const struct x86_pmu amd_pmu = {
385 .cpu_dead = amd_pmu_cpu_dead, 390 .cpu_dead = amd_pmu_cpu_dead,
386}; 391};
387 392
393/* AMD Family 15h */
394
395#define AMD_EVENT_TYPE_MASK 0x000000F0ULL
396
397#define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL
398#define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL
399#define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL
400#define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL
401#define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL
402#define AMD_EVENT_EX_LS 0x000000C0ULL
403#define AMD_EVENT_DE 0x000000D0ULL
404#define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL
405
406/*
407 * AMD family 15h event code/PMC mappings:
408 *
409 * type = event_code & 0x0F0:
410 *
411 * 0x000 FP PERF_CTL[5:3]
412 * 0x010 FP PERF_CTL[5:3]
413 * 0x020 LS PERF_CTL[5:0]
414 * 0x030 LS PERF_CTL[5:0]
415 * 0x040 DC PERF_CTL[5:0]
416 * 0x050 DC PERF_CTL[5:0]
417 * 0x060 CU PERF_CTL[2:0]
418 * 0x070 CU PERF_CTL[2:0]
419 * 0x080 IC/DE PERF_CTL[2:0]
420 * 0x090 IC/DE PERF_CTL[2:0]
421 * 0x0A0 ---
422 * 0x0B0 ---
423 * 0x0C0 EX/LS PERF_CTL[5:0]
424 * 0x0D0 DE PERF_CTL[2:0]
425 * 0x0E0 NB NB_PERF_CTL[3:0]
426 * 0x0F0 NB NB_PERF_CTL[3:0]
427 *
428 * Exceptions:
429 *
430 * 0x003 FP PERF_CTL[3]
431 * 0x00B FP PERF_CTL[3]
432 * 0x00D FP PERF_CTL[3]
433 * 0x023 DE PERF_CTL[2:0]
434 * 0x02D LS PERF_CTL[3]
435 * 0x02E LS PERF_CTL[3,0]
436 * 0x043 CU PERF_CTL[2:0]
437 * 0x045 CU PERF_CTL[2:0]
438 * 0x046 CU PERF_CTL[2:0]
439 * 0x054 CU PERF_CTL[2:0]
440 * 0x055 CU PERF_CTL[2:0]
441 * 0x08F IC PERF_CTL[0]
442 * 0x187 DE PERF_CTL[0]
443 * 0x188 DE PERF_CTL[0]
444 * 0x0DB EX PERF_CTL[5:0]
445 * 0x0DC LS PERF_CTL[5:0]
446 * 0x0DD LS PERF_CTL[5:0]
447 * 0x0DE LS PERF_CTL[5:0]
448 * 0x0DF LS PERF_CTL[5:0]
449 * 0x1D6 EX PERF_CTL[5:0]
450 * 0x1D8 EX PERF_CTL[5:0]
451 */
452
453static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
454static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
455static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0);
456static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0);
457static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
458static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
459
460static struct event_constraint *
461amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
462{
463 unsigned int event_code = amd_get_event_code(&event->hw);
464
465 switch (event_code & AMD_EVENT_TYPE_MASK) {
466 case AMD_EVENT_FP:
467 switch (event_code) {
468 case 0x003:
469 case 0x00B:
470 case 0x00D:
471 return &amd_f15_PMC3;
472 default:
473 return &amd_f15_PMC53;
474 }
475 case AMD_EVENT_LS:
476 case AMD_EVENT_DC:
477 case AMD_EVENT_EX_LS:
478 switch (event_code) {
479 case 0x023:
480 case 0x043:
481 case 0x045:
482 case 0x046:
483 case 0x054:
484 case 0x055:
485 return &amd_f15_PMC20;
486 case 0x02D:
487 return &amd_f15_PMC3;
488 case 0x02E:
489 return &amd_f15_PMC30;
490 default:
491 return &amd_f15_PMC50;
492 }
493 case AMD_EVENT_CU:
494 case AMD_EVENT_IC_DE:
495 case AMD_EVENT_DE:
496 switch (event_code) {
497 case 0x08F:
498 case 0x187:
499 case 0x188:
500 return &amd_f15_PMC0;
501 case 0x0DB ... 0x0DF:
502 case 0x1D6:
503 case 0x1D8:
504 return &amd_f15_PMC50;
505 default:
506 return &amd_f15_PMC20;
507 }
508 case AMD_EVENT_NB:
509 /* not yet implemented */
510 return &emptyconstraint;
511 default:
512 return &emptyconstraint;
513 }
514}
515
516static __initconst const struct x86_pmu amd_pmu_f15h = {
517 .name = "AMD Family 15h",
518 .handle_irq = x86_pmu_handle_irq,
519 .disable_all = x86_pmu_disable_all,
520 .enable_all = x86_pmu_enable_all,
521 .enable = x86_pmu_enable_event,
522 .disable = x86_pmu_disable_event,
523 .hw_config = amd_pmu_hw_config,
524 .schedule_events = x86_schedule_events,
525 .eventsel = MSR_F15H_PERF_CTL,
526 .perfctr = MSR_F15H_PERF_CTR,
527 .event_map = amd_pmu_event_map,
528 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
529 .num_counters = 6,
530 .cntval_bits = 48,
531 .cntval_mask = (1ULL << 48) - 1,
532 .apic = 1,
533 /* use highest bit to detect overflow */
534 .max_period = (1ULL << 47) - 1,
535 .get_event_constraints = amd_get_event_constraints_f15h,
536 /* nortbridge counters not yet implemented: */
537#if 0
538 .put_event_constraints = amd_put_event_constraints,
539
540 .cpu_prepare = amd_pmu_cpu_prepare,
541 .cpu_starting = amd_pmu_cpu_starting,
542 .cpu_dead = amd_pmu_cpu_dead,
543#endif
544};
545
388static __init int amd_pmu_init(void) 546static __init int amd_pmu_init(void)
389{ 547{
390 /* Performance-monitoring supported from K7 and later: */ 548 /* Performance-monitoring supported from K7 and later: */
391 if (boot_cpu_data.x86 < 6) 549 if (boot_cpu_data.x86 < 6)
392 return -ENODEV; 550 return -ENODEV;
393 551
394 x86_pmu = amd_pmu; 552 /*
553 * If core performance counter extensions exists, it must be
554 * family 15h, otherwise fail. See x86_pmu_addr_offset().
555 */
556 switch (boot_cpu_data.x86) {
557 case 0x15:
558 if (!cpu_has_perfctr_core)
559 return -ENODEV;
560 x86_pmu = amd_pmu_f15h;
561 break;
562 default:
563 if (cpu_has_perfctr_core)
564 return -ENODEV;
565 x86_pmu = amd_pmu;
566 break;
567 }
395 568
396 /* Events are common for all AMDs */ 569 /* Events are common for all AMDs */
397 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, 570 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 008835c1d79c..8fc2b2cee1da 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1,5 +1,27 @@
1#ifdef CONFIG_CPU_SUP_INTEL 1#ifdef CONFIG_CPU_SUP_INTEL
2 2
3#define MAX_EXTRA_REGS 2
4
5/*
6 * Per register state.
7 */
8struct er_account {
9 int ref; /* reference count */
10 unsigned int extra_reg; /* extra MSR number */
11 u64 extra_config; /* extra MSR config */
12};
13
14/*
15 * Per core state
16 * This used to coordinate shared registers for HT threads.
17 */
18struct intel_percore {
19 raw_spinlock_t lock; /* protect structure */
20 struct er_account regs[MAX_EXTRA_REGS];
21 int refcnt; /* number of threads */
22 unsigned core_id;
23};
24
3/* 25/*
4 * Intel PerfMon, used on Core and later. 26 * Intel PerfMon, used on Core and later.
5 */ 27 */
@@ -64,6 +86,18 @@ static struct event_constraint intel_nehalem_event_constraints[] =
64 EVENT_CONSTRAINT_END 86 EVENT_CONSTRAINT_END
65}; 87};
66 88
89static struct extra_reg intel_nehalem_extra_regs[] =
90{
91 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
92 EVENT_EXTRA_END
93};
94
95static struct event_constraint intel_nehalem_percore_constraints[] =
96{
97 INTEL_EVENT_CONSTRAINT(0xb7, 0),
98 EVENT_CONSTRAINT_END
99};
100
67static struct event_constraint intel_westmere_event_constraints[] = 101static struct event_constraint intel_westmere_event_constraints[] =
68{ 102{
69 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 103 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -76,6 +110,33 @@ static struct event_constraint intel_westmere_event_constraints[] =
76 EVENT_CONSTRAINT_END 110 EVENT_CONSTRAINT_END
77}; 111};
78 112
113static struct event_constraint intel_snb_event_constraints[] =
114{
115 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
116 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
117 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
118 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
119 INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */
120 INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */
121 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
122 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
123 EVENT_CONSTRAINT_END
124};
125
126static struct extra_reg intel_westmere_extra_regs[] =
127{
128 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
129 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff),
130 EVENT_EXTRA_END
131};
132
133static struct event_constraint intel_westmere_percore_constraints[] =
134{
135 INTEL_EVENT_CONSTRAINT(0xb7, 0),
136 INTEL_EVENT_CONSTRAINT(0xbb, 0),
137 EVENT_CONSTRAINT_END
138};
139
79static struct event_constraint intel_gen_event_constraints[] = 140static struct event_constraint intel_gen_event_constraints[] =
80{ 141{
81 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 142 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -89,6 +150,106 @@ static u64 intel_pmu_event_map(int hw_event)
89 return intel_perfmon_event_map[hw_event]; 150 return intel_perfmon_event_map[hw_event];
90} 151}
91 152
153static __initconst const u64 snb_hw_cache_event_ids
154 [PERF_COUNT_HW_CACHE_MAX]
155 [PERF_COUNT_HW_CACHE_OP_MAX]
156 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
157{
158 [ C(L1D) ] = {
159 [ C(OP_READ) ] = {
160 [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */
161 [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */
162 },
163 [ C(OP_WRITE) ] = {
164 [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */
165 [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */
166 },
167 [ C(OP_PREFETCH) ] = {
168 [ C(RESULT_ACCESS) ] = 0x0,
169 [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */
170 },
171 },
172 [ C(L1I ) ] = {
173 [ C(OP_READ) ] = {
174 [ C(RESULT_ACCESS) ] = 0x0,
175 [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */
176 },
177 [ C(OP_WRITE) ] = {
178 [ C(RESULT_ACCESS) ] = -1,
179 [ C(RESULT_MISS) ] = -1,
180 },
181 [ C(OP_PREFETCH) ] = {
182 [ C(RESULT_ACCESS) ] = 0x0,
183 [ C(RESULT_MISS) ] = 0x0,
184 },
185 },
186 [ C(LL ) ] = {
187 /*
188 * TBD: Need Off-core Response Performance Monitoring support
189 */
190 [ C(OP_READ) ] = {
191 /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
192 [ C(RESULT_ACCESS) ] = 0x01b7,
193 /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
194 [ C(RESULT_MISS) ] = 0x01bb,
195 },
196 [ C(OP_WRITE) ] = {
197 /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */
198 [ C(RESULT_ACCESS) ] = 0x01b7,
199 /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */
200 [ C(RESULT_MISS) ] = 0x01bb,
201 },
202 [ C(OP_PREFETCH) ] = {
203 /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
204 [ C(RESULT_ACCESS) ] = 0x01b7,
205 /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
206 [ C(RESULT_MISS) ] = 0x01bb,
207 },
208 },
209 [ C(DTLB) ] = {
210 [ C(OP_READ) ] = {
211 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */
212 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
213 },
214 [ C(OP_WRITE) ] = {
215 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */
216 [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
217 },
218 [ C(OP_PREFETCH) ] = {
219 [ C(RESULT_ACCESS) ] = 0x0,
220 [ C(RESULT_MISS) ] = 0x0,
221 },
222 },
223 [ C(ITLB) ] = {
224 [ C(OP_READ) ] = {
225 [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */
226 [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */
227 },
228 [ C(OP_WRITE) ] = {
229 [ C(RESULT_ACCESS) ] = -1,
230 [ C(RESULT_MISS) ] = -1,
231 },
232 [ C(OP_PREFETCH) ] = {
233 [ C(RESULT_ACCESS) ] = -1,
234 [ C(RESULT_MISS) ] = -1,
235 },
236 },
237 [ C(BPU ) ] = {
238 [ C(OP_READ) ] = {
239 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
240 [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
241 },
242 [ C(OP_WRITE) ] = {
243 [ C(RESULT_ACCESS) ] = -1,
244 [ C(RESULT_MISS) ] = -1,
245 },
246 [ C(OP_PREFETCH) ] = {
247 [ C(RESULT_ACCESS) ] = -1,
248 [ C(RESULT_MISS) ] = -1,
249 },
250 },
251};
252
92static __initconst const u64 westmere_hw_cache_event_ids 253static __initconst const u64 westmere_hw_cache_event_ids
93 [PERF_COUNT_HW_CACHE_MAX] 254 [PERF_COUNT_HW_CACHE_MAX]
94 [PERF_COUNT_HW_CACHE_OP_MAX] 255 [PERF_COUNT_HW_CACHE_OP_MAX]
@@ -124,16 +285,26 @@ static __initconst const u64 westmere_hw_cache_event_ids
124 }, 285 },
125 [ C(LL ) ] = { 286 [ C(LL ) ] = {
126 [ C(OP_READ) ] = { 287 [ C(OP_READ) ] = {
127 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ 288 /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
128 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ 289 [ C(RESULT_ACCESS) ] = 0x01b7,
290 /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
291 [ C(RESULT_MISS) ] = 0x01bb,
129 }, 292 },
293 /*
294 * Use RFO, not WRITEBACK, because a write miss would typically occur
295 * on RFO.
296 */
130 [ C(OP_WRITE) ] = { 297 [ C(OP_WRITE) ] = {
131 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ 298 /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */
132 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ 299 [ C(RESULT_ACCESS) ] = 0x01bb,
300 /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */
301 [ C(RESULT_MISS) ] = 0x01b7,
133 }, 302 },
134 [ C(OP_PREFETCH) ] = { 303 [ C(OP_PREFETCH) ] = {
135 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ 304 /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
136 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ 305 [ C(RESULT_ACCESS) ] = 0x01b7,
306 /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
307 [ C(RESULT_MISS) ] = 0x01bb,
137 }, 308 },
138 }, 309 },
139 [ C(DTLB) ] = { 310 [ C(DTLB) ] = {
@@ -180,6 +351,39 @@ static __initconst const u64 westmere_hw_cache_event_ids
180 }, 351 },
181}; 352};
182 353
354/*
355 * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3
356 */
357
358#define DMND_DATA_RD (1 << 0)
359#define DMND_RFO (1 << 1)
360#define DMND_WB (1 << 3)
361#define PF_DATA_RD (1 << 4)
362#define PF_DATA_RFO (1 << 5)
363#define RESP_UNCORE_HIT (1 << 8)
364#define RESP_MISS (0xf600) /* non uncore hit */
365
366static __initconst const u64 nehalem_hw_cache_extra_regs
367 [PERF_COUNT_HW_CACHE_MAX]
368 [PERF_COUNT_HW_CACHE_OP_MAX]
369 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
370{
371 [ C(LL ) ] = {
372 [ C(OP_READ) ] = {
373 [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT,
374 [ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS,
375 },
376 [ C(OP_WRITE) ] = {
377 [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT,
378 [ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS,
379 },
380 [ C(OP_PREFETCH) ] = {
381 [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT,
382 [ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS,
383 },
384 }
385};
386
183static __initconst const u64 nehalem_hw_cache_event_ids 387static __initconst const u64 nehalem_hw_cache_event_ids
184 [PERF_COUNT_HW_CACHE_MAX] 388 [PERF_COUNT_HW_CACHE_MAX]
185 [PERF_COUNT_HW_CACHE_OP_MAX] 389 [PERF_COUNT_HW_CACHE_OP_MAX]
@@ -215,16 +419,26 @@ static __initconst const u64 nehalem_hw_cache_event_ids
215 }, 419 },
216 [ C(LL ) ] = { 420 [ C(LL ) ] = {
217 [ C(OP_READ) ] = { 421 [ C(OP_READ) ] = {
218 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ 422 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
219 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ 423 [ C(RESULT_ACCESS) ] = 0x01b7,
424 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
425 [ C(RESULT_MISS) ] = 0x01b7,
220 }, 426 },
427 /*
428 * Use RFO, not WRITEBACK, because a write miss would typically occur
429 * on RFO.
430 */
221 [ C(OP_WRITE) ] = { 431 [ C(OP_WRITE) ] = {
222 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ 432 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
223 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ 433 [ C(RESULT_ACCESS) ] = 0x01b7,
434 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
435 [ C(RESULT_MISS) ] = 0x01b7,
224 }, 436 },
225 [ C(OP_PREFETCH) ] = { 437 [ C(OP_PREFETCH) ] = {
226 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ 438 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
227 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ 439 [ C(RESULT_ACCESS) ] = 0x01b7,
440 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
441 [ C(RESULT_MISS) ] = 0x01b7,
228 }, 442 },
229 }, 443 },
230 [ C(DTLB) ] = { 444 [ C(DTLB) ] = {
@@ -691,8 +905,8 @@ static void intel_pmu_reset(void)
691 printk("clearing PMU state on CPU#%d\n", smp_processor_id()); 905 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
692 906
693 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 907 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
694 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); 908 checking_wrmsrl(x86_pmu_config_addr(idx), 0ull);
695 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); 909 checking_wrmsrl(x86_pmu_event_addr(idx), 0ull);
696 } 910 }
697 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) 911 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
698 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); 912 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
@@ -794,6 +1008,67 @@ intel_bts_constraints(struct perf_event *event)
794} 1008}
795 1009
796static struct event_constraint * 1010static struct event_constraint *
1011intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1012{
1013 struct hw_perf_event *hwc = &event->hw;
1014 unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
1015 struct event_constraint *c;
1016 struct intel_percore *pc;
1017 struct er_account *era;
1018 int i;
1019 int free_slot;
1020 int found;
1021
1022 if (!x86_pmu.percore_constraints || hwc->extra_alloc)
1023 return NULL;
1024
1025 for (c = x86_pmu.percore_constraints; c->cmask; c++) {
1026 if (e != c->code)
1027 continue;
1028
1029 /*
1030 * Allocate resource per core.
1031 */
1032 pc = cpuc->per_core;
1033 if (!pc)
1034 break;
1035 c = &emptyconstraint;
1036 raw_spin_lock(&pc->lock);
1037 free_slot = -1;
1038 found = 0;
1039 for (i = 0; i < MAX_EXTRA_REGS; i++) {
1040 era = &pc->regs[i];
1041 if (era->ref > 0 && hwc->extra_reg == era->extra_reg) {
1042 /* Allow sharing same config */
1043 if (hwc->extra_config == era->extra_config) {
1044 era->ref++;
1045 cpuc->percore_used = 1;
1046 hwc->extra_alloc = 1;
1047 c = NULL;
1048 }
1049 /* else conflict */
1050 found = 1;
1051 break;
1052 } else if (era->ref == 0 && free_slot == -1)
1053 free_slot = i;
1054 }
1055 if (!found && free_slot != -1) {
1056 era = &pc->regs[free_slot];
1057 era->ref = 1;
1058 era->extra_reg = hwc->extra_reg;
1059 era->extra_config = hwc->extra_config;
1060 cpuc->percore_used = 1;
1061 hwc->extra_alloc = 1;
1062 c = NULL;
1063 }
1064 raw_spin_unlock(&pc->lock);
1065 return c;
1066 }
1067
1068 return NULL;
1069}
1070
1071static struct event_constraint *
797intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) 1072intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
798{ 1073{
799 struct event_constraint *c; 1074 struct event_constraint *c;
@@ -806,9 +1081,51 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
806 if (c) 1081 if (c)
807 return c; 1082 return c;
808 1083
1084 c = intel_percore_constraints(cpuc, event);
1085 if (c)
1086 return c;
1087
809 return x86_get_event_constraints(cpuc, event); 1088 return x86_get_event_constraints(cpuc, event);
810} 1089}
811 1090
1091static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1092 struct perf_event *event)
1093{
1094 struct extra_reg *er;
1095 struct intel_percore *pc;
1096 struct er_account *era;
1097 struct hw_perf_event *hwc = &event->hw;
1098 int i, allref;
1099
1100 if (!cpuc->percore_used)
1101 return;
1102
1103 for (er = x86_pmu.extra_regs; er->msr; er++) {
1104 if (er->event != (hwc->config & er->config_mask))
1105 continue;
1106
1107 pc = cpuc->per_core;
1108 raw_spin_lock(&pc->lock);
1109 for (i = 0; i < MAX_EXTRA_REGS; i++) {
1110 era = &pc->regs[i];
1111 if (era->ref > 0 &&
1112 era->extra_config == hwc->extra_config &&
1113 era->extra_reg == er->msr) {
1114 era->ref--;
1115 hwc->extra_alloc = 0;
1116 break;
1117 }
1118 }
1119 allref = 0;
1120 for (i = 0; i < MAX_EXTRA_REGS; i++)
1121 allref += pc->regs[i].ref;
1122 if (allref == 0)
1123 cpuc->percore_used = 0;
1124 raw_spin_unlock(&pc->lock);
1125 break;
1126 }
1127}
1128
812static int intel_pmu_hw_config(struct perf_event *event) 1129static int intel_pmu_hw_config(struct perf_event *event)
813{ 1130{
814 int ret = x86_pmu_hw_config(event); 1131 int ret = x86_pmu_hw_config(event);
@@ -880,20 +1197,67 @@ static __initconst const struct x86_pmu core_pmu = {
880 */ 1197 */
881 .max_period = (1ULL << 31) - 1, 1198 .max_period = (1ULL << 31) - 1,
882 .get_event_constraints = intel_get_event_constraints, 1199 .get_event_constraints = intel_get_event_constraints,
1200 .put_event_constraints = intel_put_event_constraints,
883 .event_constraints = intel_core_event_constraints, 1201 .event_constraints = intel_core_event_constraints,
884}; 1202};
885 1203
1204static int intel_pmu_cpu_prepare(int cpu)
1205{
1206 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1207
1208 if (!cpu_has_ht_siblings())
1209 return NOTIFY_OK;
1210
1211 cpuc->per_core = kzalloc_node(sizeof(struct intel_percore),
1212 GFP_KERNEL, cpu_to_node(cpu));
1213 if (!cpuc->per_core)
1214 return NOTIFY_BAD;
1215
1216 raw_spin_lock_init(&cpuc->per_core->lock);
1217 cpuc->per_core->core_id = -1;
1218 return NOTIFY_OK;
1219}
1220
886static void intel_pmu_cpu_starting(int cpu) 1221static void intel_pmu_cpu_starting(int cpu)
887{ 1222{
1223 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1224 int core_id = topology_core_id(cpu);
1225 int i;
1226
888 init_debug_store_on_cpu(cpu); 1227 init_debug_store_on_cpu(cpu);
889 /* 1228 /*
890 * Deal with CPUs that don't clear their LBRs on power-up. 1229 * Deal with CPUs that don't clear their LBRs on power-up.
891 */ 1230 */
892 intel_pmu_lbr_reset(); 1231 intel_pmu_lbr_reset();
1232
1233 if (!cpu_has_ht_siblings())
1234 return;
1235
1236 for_each_cpu(i, topology_thread_cpumask(cpu)) {
1237 struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core;
1238
1239 if (pc && pc->core_id == core_id) {
1240 kfree(cpuc->per_core);
1241 cpuc->per_core = pc;
1242 break;
1243 }
1244 }
1245
1246 cpuc->per_core->core_id = core_id;
1247 cpuc->per_core->refcnt++;
893} 1248}
894 1249
895static void intel_pmu_cpu_dying(int cpu) 1250static void intel_pmu_cpu_dying(int cpu)
896{ 1251{
1252 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1253 struct intel_percore *pc = cpuc->per_core;
1254
1255 if (pc) {
1256 if (pc->core_id == -1 || --pc->refcnt == 0)
1257 kfree(pc);
1258 cpuc->per_core = NULL;
1259 }
1260
897 fini_debug_store_on_cpu(cpu); 1261 fini_debug_store_on_cpu(cpu);
898} 1262}
899 1263
@@ -918,7 +1282,9 @@ static __initconst const struct x86_pmu intel_pmu = {
918 */ 1282 */
919 .max_period = (1ULL << 31) - 1, 1283 .max_period = (1ULL << 31) - 1,
920 .get_event_constraints = intel_get_event_constraints, 1284 .get_event_constraints = intel_get_event_constraints,
1285 .put_event_constraints = intel_put_event_constraints,
921 1286
1287 .cpu_prepare = intel_pmu_cpu_prepare,
922 .cpu_starting = intel_pmu_cpu_starting, 1288 .cpu_starting = intel_pmu_cpu_starting,
923 .cpu_dying = intel_pmu_cpu_dying, 1289 .cpu_dying = intel_pmu_cpu_dying,
924}; 1290};
@@ -1024,6 +1390,7 @@ static __init int intel_pmu_init(void)
1024 intel_pmu_lbr_init_core(); 1390 intel_pmu_lbr_init_core();
1025 1391
1026 x86_pmu.event_constraints = intel_core2_event_constraints; 1392 x86_pmu.event_constraints = intel_core2_event_constraints;
1393 x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
1027 pr_cont("Core2 events, "); 1394 pr_cont("Core2 events, ");
1028 break; 1395 break;
1029 1396
@@ -1032,11 +1399,16 @@ static __init int intel_pmu_init(void)
1032 case 46: /* 45 nm nehalem-ex, "Beckton" */ 1399 case 46: /* 45 nm nehalem-ex, "Beckton" */
1033 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, 1400 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
1034 sizeof(hw_cache_event_ids)); 1401 sizeof(hw_cache_event_ids));
1402 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
1403 sizeof(hw_cache_extra_regs));
1035 1404
1036 intel_pmu_lbr_init_nhm(); 1405 intel_pmu_lbr_init_nhm();
1037 1406
1038 x86_pmu.event_constraints = intel_nehalem_event_constraints; 1407 x86_pmu.event_constraints = intel_nehalem_event_constraints;
1408 x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
1409 x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
1039 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1410 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1411 x86_pmu.extra_regs = intel_nehalem_extra_regs;
1040 pr_cont("Nehalem events, "); 1412 pr_cont("Nehalem events, ");
1041 break; 1413 break;
1042 1414
@@ -1047,6 +1419,7 @@ static __init int intel_pmu_init(void)
1047 intel_pmu_lbr_init_atom(); 1419 intel_pmu_lbr_init_atom();
1048 1420
1049 x86_pmu.event_constraints = intel_gen_event_constraints; 1421 x86_pmu.event_constraints = intel_gen_event_constraints;
1422 x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
1050 pr_cont("Atom events, "); 1423 pr_cont("Atom events, ");
1051 break; 1424 break;
1052 1425
@@ -1054,14 +1427,30 @@ static __init int intel_pmu_init(void)
1054 case 44: /* 32 nm nehalem, "Gulftown" */ 1427 case 44: /* 32 nm nehalem, "Gulftown" */
1055 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, 1428 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
1056 sizeof(hw_cache_event_ids)); 1429 sizeof(hw_cache_event_ids));
1430 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
1431 sizeof(hw_cache_extra_regs));
1057 1432
1058 intel_pmu_lbr_init_nhm(); 1433 intel_pmu_lbr_init_nhm();
1059 1434
1060 x86_pmu.event_constraints = intel_westmere_event_constraints; 1435 x86_pmu.event_constraints = intel_westmere_event_constraints;
1436 x86_pmu.percore_constraints = intel_westmere_percore_constraints;
1061 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1437 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1438 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
1439 x86_pmu.extra_regs = intel_westmere_extra_regs;
1062 pr_cont("Westmere events, "); 1440 pr_cont("Westmere events, ");
1063 break; 1441 break;
1064 1442
1443 case 42: /* SandyBridge */
1444 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
1445 sizeof(hw_cache_event_ids));
1446
1447 intel_pmu_lbr_init_nhm();
1448
1449 x86_pmu.event_constraints = intel_snb_event_constraints;
1450 x86_pmu.pebs_constraints = intel_snb_pebs_events;
1451 pr_cont("SandyBridge events, ");
1452 break;
1453
1065 default: 1454 default:
1066 /* 1455 /*
1067 * default constraints for v2 and up 1456 * default constraints for v2 and up
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index b7dcd9f2b8a0..b95c66ae4a2a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -361,30 +361,88 @@ static int intel_pmu_drain_bts_buffer(void)
361/* 361/*
362 * PEBS 362 * PEBS
363 */ 363 */
364 364static struct event_constraint intel_core2_pebs_event_constraints[] = {
365static struct event_constraint intel_core_pebs_events[] = { 365 PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
366 PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */
367 PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ 366 PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
368 PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ 367 PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
369 PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ 368 PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
370 PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */ 369 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
371 PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ 370 EVENT_CONSTRAINT_END
372 PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */ 371};
373 PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ 372
374 PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */ 373static struct event_constraint intel_atom_pebs_event_constraints[] = {
374 PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
375 PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
376 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
375 EVENT_CONSTRAINT_END 377 EVENT_CONSTRAINT_END
376}; 378};
377 379
378static struct event_constraint intel_nehalem_pebs_events[] = { 380static struct event_constraint intel_nehalem_pebs_event_constraints[] = {
379 PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */ 381 INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */
380 PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */ 382 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
381 PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */ 383 PEBS_EVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
382 PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */ 384 INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
383 PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */ 385 INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
384 PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ 386 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
385 PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */ 387 PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
386 PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ 388 INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
387 PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */ 389 PEBS_EVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
390 INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
391 INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
392 EVENT_CONSTRAINT_END
393};
394
395static struct event_constraint intel_westmere_pebs_event_constraints[] = {
396 INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */
397 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
398 PEBS_EVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
399 INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
400 INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
401
402 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
403 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
404 INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
405 PEBS_EVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
406 INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
407 INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
408 EVENT_CONSTRAINT_END
409};
410
411static struct event_constraint intel_snb_pebs_events[] = {
412 PEBS_EVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
413 PEBS_EVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
414 PEBS_EVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
415 PEBS_EVENT_CONSTRAINT(0x01c4, 0xf), /* BR_INST_RETIRED.CONDITIONAL */
416 PEBS_EVENT_CONSTRAINT(0x02c4, 0xf), /* BR_INST_RETIRED.NEAR_CALL */
417 PEBS_EVENT_CONSTRAINT(0x04c4, 0xf), /* BR_INST_RETIRED.ALL_BRANCHES */
418 PEBS_EVENT_CONSTRAINT(0x08c4, 0xf), /* BR_INST_RETIRED.NEAR_RETURN */
419 PEBS_EVENT_CONSTRAINT(0x10c4, 0xf), /* BR_INST_RETIRED.NOT_TAKEN */
420 PEBS_EVENT_CONSTRAINT(0x20c4, 0xf), /* BR_INST_RETIRED.NEAR_TAKEN */
421 PEBS_EVENT_CONSTRAINT(0x40c4, 0xf), /* BR_INST_RETIRED.FAR_BRANCH */
422 PEBS_EVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
423 PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
424 PEBS_EVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
425 PEBS_EVENT_CONSTRAINT(0x10c5, 0xf), /* BR_MISP_RETIRED.NOT_TAKEN */
426 PEBS_EVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.TAKEN */
427 PEBS_EVENT_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
428 PEBS_EVENT_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORE */
429 PEBS_EVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */
430 PEBS_EVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */
431 PEBS_EVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */
432 PEBS_EVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */
433 PEBS_EVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */
434 PEBS_EVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */
435 PEBS_EVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */
436 PEBS_EVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */
437 PEBS_EVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
438 PEBS_EVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
439 PEBS_EVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.LLC_HIT */
440 PEBS_EVENT_CONSTRAINT(0x40d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */
441 PEBS_EVENT_CONSTRAINT(0x01d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */
442 PEBS_EVENT_CONSTRAINT(0x02d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */
443 PEBS_EVENT_CONSTRAINT(0x04d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM */
444 PEBS_EVENT_CONSTRAINT(0x08d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE */
445 PEBS_EVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
388 EVENT_CONSTRAINT_END 446 EVENT_CONSTRAINT_END
389}; 447};
390 448
@@ -695,20 +753,17 @@ static void intel_ds_init(void)
695 printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); 753 printk(KERN_CONT "PEBS fmt0%c, ", pebs_type);
696 x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); 754 x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
697 x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; 755 x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
698 x86_pmu.pebs_constraints = intel_core_pebs_events;
699 break; 756 break;
700 757
701 case 1: 758 case 1:
702 printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); 759 printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
703 x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); 760 x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
704 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; 761 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
705 x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
706 break; 762 break;
707 763
708 default: 764 default:
709 printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); 765 printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
710 x86_pmu.pebs = 0; 766 x86_pmu.pebs = 0;
711 break;
712 } 767 }
713 } 768 }
714} 769}
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index f7a0993c1e7c..3769ac822f96 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -764,15 +764,20 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
764 u64 v; 764 u64 v;
765 765
766 /* an official way for overflow indication */ 766 /* an official way for overflow indication */
767 rdmsrl(hwc->config_base + hwc->idx, v); 767 rdmsrl(hwc->config_base, v);
768 if (v & P4_CCCR_OVF) { 768 if (v & P4_CCCR_OVF) {
769 wrmsrl(hwc->config_base + hwc->idx, v & ~P4_CCCR_OVF); 769 wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF);
770 return 1; 770 return 1;
771 } 771 }
772 772
773 /* it might be unflagged overflow */ 773 /*
774 rdmsrl(hwc->event_base + hwc->idx, v); 774 * In some circumstances the overflow might issue an NMI but did
775 if (!(v & ARCH_P4_CNTRVAL_MASK)) 775 * not set P4_CCCR_OVF bit. Because a counter holds a negative value
776 * we simply check for high bit being set, if it's cleared it means
777 * the counter has reached zero value and continued counting before
778 * real NMI signal was received:
779 */
780 if (!(v & ARCH_P4_UNFLAGGED_BIT))
776 return 1; 781 return 1;
777 782
778 return 0; 783 return 0;
@@ -810,7 +815,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event)
810 * state we need to clear P4_CCCR_OVF, otherwise interrupt get 815 * state we need to clear P4_CCCR_OVF, otherwise interrupt get
811 * asserted again and again 816 * asserted again and again
812 */ 817 */
813 (void)checking_wrmsrl(hwc->config_base + hwc->idx, 818 (void)checking_wrmsrl(hwc->config_base,
814 (u64)(p4_config_unpack_cccr(hwc->config)) & 819 (u64)(p4_config_unpack_cccr(hwc->config)) &
815 ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); 820 ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
816} 821}
@@ -880,7 +885,7 @@ static void p4_pmu_enable_event(struct perf_event *event)
880 p4_pmu_enable_pebs(hwc->config); 885 p4_pmu_enable_pebs(hwc->config);
881 886
882 (void)checking_wrmsrl(escr_addr, escr_conf); 887 (void)checking_wrmsrl(escr_addr, escr_conf);
883 (void)checking_wrmsrl(hwc->config_base + hwc->idx, 888 (void)checking_wrmsrl(hwc->config_base,
884 (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); 889 (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
885} 890}
886 891
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 34ba07be2cda..20c097e33860 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -68,7 +68,7 @@ p6_pmu_disable_event(struct perf_event *event)
68 if (cpuc->enabled) 68 if (cpuc->enabled)
69 val |= ARCH_PERFMON_EVENTSEL_ENABLE; 69 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
70 70
71 (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); 71 (void)checking_wrmsrl(hwc->config_base, val);
72} 72}
73 73
74static void p6_pmu_enable_event(struct perf_event *event) 74static void p6_pmu_enable_event(struct perf_event *event)
@@ -81,7 +81,7 @@ static void p6_pmu_enable_event(struct perf_event *event)
81 if (cpuc->enabled) 81 if (cpuc->enabled)
82 val |= ARCH_PERFMON_EVENTSEL_ENABLE; 82 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
83 83
84 (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); 84 (void)checking_wrmsrl(hwc->config_base, val);
85} 85}
86 86
87static __initconst const struct x86_pmu p6_pmu = { 87static __initconst const struct x86_pmu p6_pmu = {
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index d5a236615501..966512b2cacf 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -46,6 +46,8 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
46 /* returns the bit offset of the performance counter register */ 46 /* returns the bit offset of the performance counter register */
47 switch (boot_cpu_data.x86_vendor) { 47 switch (boot_cpu_data.x86_vendor) {
48 case X86_VENDOR_AMD: 48 case X86_VENDOR_AMD:
49 if (msr >= MSR_F15H_PERF_CTR)
50 return (msr - MSR_F15H_PERF_CTR) >> 1;
49 return msr - MSR_K7_PERFCTR0; 51 return msr - MSR_K7_PERFCTR0;
50 case X86_VENDOR_INTEL: 52 case X86_VENDOR_INTEL:
51 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) 53 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
@@ -70,6 +72,8 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
70 /* returns the bit offset of the event selection register */ 72 /* returns the bit offset of the event selection register */
71 switch (boot_cpu_data.x86_vendor) { 73 switch (boot_cpu_data.x86_vendor) {
72 case X86_VENDOR_AMD: 74 case X86_VENDOR_AMD:
75 if (msr >= MSR_F15H_PERF_CTL)
76 return (msr - MSR_F15H_PERF_CTL) >> 1;
73 return msr - MSR_K7_EVNTSEL0; 77 return msr - MSR_K7_EVNTSEL0;
74 case X86_VENDOR_INTEL: 78 case X86_VENDOR_INTEL:
75 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) 79 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index df20723a6a1b..220a1c11cfde 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -320,31 +320,6 @@ void die(const char *str, struct pt_regs *regs, long err)
320 oops_end(flags, regs, sig); 320 oops_end(flags, regs, sig);
321} 321}
322 322
323void notrace __kprobes
324die_nmi(char *str, struct pt_regs *regs, int do_panic)
325{
326 unsigned long flags;
327
328 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
329 return;
330
331 /*
332 * We are in trouble anyway, lets at least try
333 * to get a message out.
334 */
335 flags = oops_begin();
336 printk(KERN_EMERG "%s", str);
337 printk(" on CPU%d, ip %08lx, registers:\n",
338 smp_processor_id(), regs->ip);
339 show_registers(regs);
340 oops_end(flags, regs, 0);
341 if (do_panic || panic_on_oops)
342 panic("Non maskable interrupt");
343 nmi_exit();
344 local_irq_enable();
345 do_exit(SIGBUS);
346}
347
348static int __init oops_setup(char *s) 323static int __init oops_setup(char *s)
349{ 324{
350 if (!s) 325 if (!s)
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 5fad62684651..cdf5bfd9d4d5 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -841,15 +841,21 @@ static int __init parse_memopt(char *p)
841 if (!p) 841 if (!p)
842 return -EINVAL; 842 return -EINVAL;
843 843
844#ifdef CONFIG_X86_32
845 if (!strcmp(p, "nopentium")) { 844 if (!strcmp(p, "nopentium")) {
845#ifdef CONFIG_X86_32
846 setup_clear_cpu_cap(X86_FEATURE_PSE); 846 setup_clear_cpu_cap(X86_FEATURE_PSE);
847 return 0; 847 return 0;
848 } 848#else
849 printk(KERN_WARNING "mem=nopentium ignored! (only supported on x86_32)\n");
850 return -EINVAL;
849#endif 851#endif
852 }
850 853
851 userdef = 1; 854 userdef = 1;
852 mem_size = memparse(p, &p); 855 mem_size = memparse(p, &p);
856 /* don't remove all of memory when handling "mem={invalid}" param */
857 if (mem_size == 0)
858 return -EINVAL;
853 e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); 859 e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
854 860
855 return 0; 861 return 0;
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 76b8cd953dee..9efbdcc56425 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -143,15 +143,10 @@ static void __init ati_bugs(int num, int slot, int func)
143 143
144static u32 __init ati_sbx00_rev(int num, int slot, int func) 144static u32 __init ati_sbx00_rev(int num, int slot, int func)
145{ 145{
146 u32 old, d; 146 u32 d;
147 147
148 d = read_pci_config(num, slot, func, 0x70);
149 old = d;
150 d &= ~(1<<8);
151 write_pci_config(num, slot, func, 0x70, d);
152 d = read_pci_config(num, slot, func, 0x8); 148 d = read_pci_config(num, slot, func, 0x8);
153 d &= 0xff; 149 d &= 0xff;
154 write_pci_config(num, slot, func, 0x70, old);
155 150
156 return d; 151 return d;
157} 152}
@@ -160,13 +155,16 @@ static void __init ati_bugs_contd(int num, int slot, int func)
160{ 155{
161 u32 d, rev; 156 u32 d, rev;
162 157
163 if (acpi_use_timer_override)
164 return;
165
166 rev = ati_sbx00_rev(num, slot, func); 158 rev = ati_sbx00_rev(num, slot, func);
159 if (rev >= 0x40)
160 acpi_fix_pin2_polarity = 1;
161
167 if (rev > 0x13) 162 if (rev > 0x13)
168 return; 163 return;
169 164
165 if (acpi_use_timer_override)
166 return;
167
170 /* check for IRQ0 interrupt swap */ 168 /* check for IRQ0 interrupt swap */
171 d = read_pci_config(num, slot, func, 0x64); 169 d = read_pci_config(num, slot, func, 0x64);
172 if (!(d & (1<<14))) 170 if (!(d & (1<<14)))
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index c8b4efad7ebb..fa41f7298c84 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -65,6 +65,8 @@
65#define sysexit_audit syscall_exit_work 65#define sysexit_audit syscall_exit_work
66#endif 66#endif
67 67
68 .section .entry.text, "ax"
69
68/* 70/*
69 * We use macros for low-level operations which need to be overridden 71 * We use macros for low-level operations which need to be overridden
70 * for paravirtualization. The following will never clobber any registers: 72 * for paravirtualization. The following will never clobber any registers:
@@ -395,7 +397,7 @@ sysenter_past_esp:
395 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words 397 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
396 * pushed above; +8 corresponds to copy_thread's esp0 setting. 398 * pushed above; +8 corresponds to copy_thread's esp0 setting.
397 */ 399 */
398 pushl_cfi ((TI_sysenter_return)-THREAD_SIZE_asm+8+4*4)(%esp) 400 pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp)
399 CFI_REL_OFFSET eip, 0 401 CFI_REL_OFFSET eip, 0
400 402
401 pushl_cfi %eax 403 pushl_cfi %eax
@@ -788,7 +790,7 @@ ENDPROC(ptregs_clone)
788 */ 790 */
789.section .init.rodata,"a" 791.section .init.rodata,"a"
790ENTRY(interrupt) 792ENTRY(interrupt)
791.text 793.section .entry.text, "ax"
792 .p2align 5 794 .p2align 5
793 .p2align CONFIG_X86_L1_CACHE_SHIFT 795 .p2align CONFIG_X86_L1_CACHE_SHIFT
794ENTRY(irq_entries_start) 796ENTRY(irq_entries_start)
@@ -807,7 +809,7 @@ vector=FIRST_EXTERNAL_VECTOR
807 .endif 809 .endif
808 .previous 810 .previous
809 .long 1b 811 .long 1b
810 .text 812 .section .entry.text, "ax"
811vector=vector+1 813vector=vector+1
812 .endif 814 .endif
813 .endr 815 .endr
@@ -1409,8 +1411,7 @@ END(general_protection)
1409#ifdef CONFIG_KVM_GUEST 1411#ifdef CONFIG_KVM_GUEST
1410ENTRY(async_page_fault) 1412ENTRY(async_page_fault)
1411 RING0_EC_FRAME 1413 RING0_EC_FRAME
1412 pushl $do_async_page_fault 1414 pushl_cfi $do_async_page_fault
1413 CFI_ADJUST_CFA_OFFSET 4
1414 jmp error_code 1415 jmp error_code
1415 CFI_ENDPROC 1416 CFI_ENDPROC
1416END(apf_page_fault) 1417END(apf_page_fault)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index aed1ffbeb0c9..b72b4a6466a9 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -61,6 +61,8 @@
61#define __AUDIT_ARCH_LE 0x40000000 61#define __AUDIT_ARCH_LE 0x40000000
62 62
63 .code64 63 .code64
64 .section .entry.text, "ax"
65
64#ifdef CONFIG_FUNCTION_TRACER 66#ifdef CONFIG_FUNCTION_TRACER
65#ifdef CONFIG_DYNAMIC_FTRACE 67#ifdef CONFIG_DYNAMIC_FTRACE
66ENTRY(mcount) 68ENTRY(mcount)
@@ -744,7 +746,7 @@ END(stub_rt_sigreturn)
744 */ 746 */
745 .section .init.rodata,"a" 747 .section .init.rodata,"a"
746ENTRY(interrupt) 748ENTRY(interrupt)
747 .text 749 .section .entry.text
748 .p2align 5 750 .p2align 5
749 .p2align CONFIG_X86_L1_CACHE_SHIFT 751 .p2align CONFIG_X86_L1_CACHE_SHIFT
750ENTRY(irq_entries_start) 752ENTRY(irq_entries_start)
@@ -763,7 +765,7 @@ vector=FIRST_EXTERNAL_VECTOR
763 .endif 765 .endif
764 .previous 766 .previous
765 .quad 1b 767 .quad 1b
766 .text 768 .section .entry.text
767vector=vector+1 769vector=vector+1
768 .endif 770 .endif
769 .endr 771 .endr
@@ -975,9 +977,12 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
975 x86_platform_ipi smp_x86_platform_ipi 977 x86_platform_ipi smp_x86_platform_ipi
976 978
977#ifdef CONFIG_SMP 979#ifdef CONFIG_SMP
978.irpc idx, "01234567" 980.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
981 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
982.if NUM_INVALIDATE_TLB_VECTORS > \idx
979apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \ 983apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \
980 invalidate_interrupt\idx smp_invalidate_interrupt 984 invalidate_interrupt\idx smp_invalidate_interrupt
985.endif
981.endr 986.endr
982#endif 987#endif
983 988
@@ -1248,7 +1253,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1248 decl PER_CPU_VAR(irq_count) 1253 decl PER_CPU_VAR(irq_count)
1249 jmp error_exit 1254 jmp error_exit
1250 CFI_ENDPROC 1255 CFI_ENDPROC
1251END(do_hypervisor_callback) 1256END(xen_do_hypervisor_callback)
1252 1257
1253/* 1258/*
1254 * Hypervisor uses this for application faults while it executes. 1259 * Hypervisor uses this for application faults while it executes.
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 382eb2936d4d..a93742a57468 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -437,18 +437,19 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
437 return; 437 return;
438 } 438 }
439 439
440 if (ftrace_push_return_trace(old, self_addr, &trace.depth,
441 frame_pointer) == -EBUSY) {
442 *parent = old;
443 return;
444 }
445
446 trace.func = self_addr; 440 trace.func = self_addr;
441 trace.depth = current->curr_ret_stack + 1;
447 442
448 /* Only trace if the calling function expects to */ 443 /* Only trace if the calling function expects to */
449 if (!ftrace_graph_entry(&trace)) { 444 if (!ftrace_graph_entry(&trace)) {
450 current->curr_ret_stack--;
451 *parent = old; 445 *parent = old;
446 return;
447 }
448
449 if (ftrace_push_return_trace(old, self_addr, &trace.depth,
450 frame_pointer) == -EBUSY) {
451 *parent = old;
452 return;
452 } 453 }
453} 454}
454#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 455#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index d8cc18a83260..ce0be7cd085e 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -73,7 +73,7 @@ MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT
73 */ 73 */
74KERNEL_PAGES = LOWMEM_PAGES 74KERNEL_PAGES = LOWMEM_PAGES
75 75
76INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm 76INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE
77RESERVE_BRK(pagetables, INIT_MAP_SIZE) 77RESERVE_BRK(pagetables, INIT_MAP_SIZE)
78 78
79/* 79/*
@@ -623,7 +623,7 @@ ENTRY(initial_code)
623 * BSS section 623 * BSS section
624 */ 624 */
625__PAGE_ALIGNED_BSS 625__PAGE_ALIGNED_BSS
626 .align PAGE_SIZE_asm 626 .align PAGE_SIZE
627#ifdef CONFIG_X86_PAE 627#ifdef CONFIG_X86_PAE
628initial_pg_pmd: 628initial_pg_pmd:
629 .fill 1024*KPMDS,4,0 629 .fill 1024*KPMDS,4,0
@@ -644,7 +644,7 @@ ENTRY(swapper_pg_dir)
644#ifdef CONFIG_X86_PAE 644#ifdef CONFIG_X86_PAE
645__PAGE_ALIGNED_DATA 645__PAGE_ALIGNED_DATA
646 /* Page-aligned for the benefit of paravirt? */ 646 /* Page-aligned for the benefit of paravirt? */
647 .align PAGE_SIZE_asm 647 .align PAGE_SIZE
648ENTRY(initial_page_table) 648ENTRY(initial_page_table)
649 .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ 649 .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */
650# if KPMDS == 3 650# if KPMDS == 3
@@ -662,7 +662,7 @@ ENTRY(initial_page_table)
662# else 662# else
663# error "Kernel PMDs should be 1, 2 or 3" 663# error "Kernel PMDs should be 1, 2 or 3"
664# endif 664# endif
665 .align PAGE_SIZE_asm /* needs to be page-sized too */ 665 .align PAGE_SIZE /* needs to be page-sized too */
666#endif 666#endif
667 667
668.data 668.data
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 4ff5968f12d2..bfe8f729e086 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -503,7 +503,7 @@ static int hpet_assign_irq(struct hpet_dev *dev)
503 if (!irq) 503 if (!irq)
504 return -EINVAL; 504 return -EINVAL;
505 505
506 set_irq_data(irq, dev); 506 irq_set_handler_data(irq, dev);
507 507
508 if (hpet_setup_msi_irq(irq)) 508 if (hpet_setup_msi_irq(irq))
509 return -EINVAL; 509 return -EINVAL;
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 20757cb2efa3..d9ca749c123b 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -112,7 +112,7 @@ static void make_8259A_irq(unsigned int irq)
112{ 112{
113 disable_irq_nosync(irq); 113 disable_irq_nosync(irq);
114 io_apic_irqs &= ~(1<<irq); 114 io_apic_irqs &= ~(1<<irq);
115 set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq, 115 irq_set_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq,
116 i8259A_chip.name); 116 i8259A_chip.name);
117 enable_irq(irq); 117 enable_irq(irq);
118} 118}
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 8eec0ec59af2..8c968974253d 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -14,22 +14,9 @@
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/thread_info.h> 15#include <linux/thread_info.h>
16#include <linux/syscalls.h> 16#include <linux/syscalls.h>
17#include <linux/bitmap.h>
17#include <asm/syscalls.h> 18#include <asm/syscalls.h>
18 19
19/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
20static void set_bitmap(unsigned long *bitmap, unsigned int base,
21 unsigned int extent, int new_value)
22{
23 unsigned int i;
24
25 for (i = base; i < base + extent; i++) {
26 if (new_value)
27 __set_bit(i, bitmap);
28 else
29 __clear_bit(i, bitmap);
30 }
31}
32
33/* 20/*
34 * this changes the io permissions bitmap in the current task. 21 * this changes the io permissions bitmap in the current task.
35 */ 22 */
@@ -69,7 +56,10 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
69 */ 56 */
70 tss = &per_cpu(init_tss, get_cpu()); 57 tss = &per_cpu(init_tss, get_cpu());
71 58
72 set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); 59 if (turn_on)
60 bitmap_clear(t->io_bitmap_ptr, from, num);
61 else
62 bitmap_set(t->io_bitmap_ptr, from, num);
73 63
74 /* 64 /*
75 * Search for a (possibly new) maximum. This is simple and stupid, 65 * Search for a (possibly new) maximum. This is simple and stupid,
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 753136003af1..948a31eae75f 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -44,9 +44,9 @@ void ack_bad_irq(unsigned int irq)
44 44
45#define irq_stats(x) (&per_cpu(irq_stat, x)) 45#define irq_stats(x) (&per_cpu(irq_stat, x))
46/* 46/*
47 * /proc/interrupts printing: 47 * /proc/interrupts printing for arch specific interrupts
48 */ 48 */
49static int show_other_interrupts(struct seq_file *p, int prec) 49int arch_show_interrupts(struct seq_file *p, int prec)
50{ 50{
51 int j; 51 int j;
52 52
@@ -122,59 +122,6 @@ static int show_other_interrupts(struct seq_file *p, int prec)
122 return 0; 122 return 0;
123} 123}
124 124
125int show_interrupts(struct seq_file *p, void *v)
126{
127 unsigned long flags, any_count = 0;
128 int i = *(loff_t *) v, j, prec;
129 struct irqaction *action;
130 struct irq_desc *desc;
131
132 if (i > nr_irqs)
133 return 0;
134
135 for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec)
136 j *= 10;
137
138 if (i == nr_irqs)
139 return show_other_interrupts(p, prec);
140
141 /* print header */
142 if (i == 0) {
143 seq_printf(p, "%*s", prec + 8, "");
144 for_each_online_cpu(j)
145 seq_printf(p, "CPU%-8d", j);
146 seq_putc(p, '\n');
147 }
148
149 desc = irq_to_desc(i);
150 if (!desc)
151 return 0;
152
153 raw_spin_lock_irqsave(&desc->lock, flags);
154 for_each_online_cpu(j)
155 any_count |= kstat_irqs_cpu(i, j);
156 action = desc->action;
157 if (!action && !any_count)
158 goto out;
159
160 seq_printf(p, "%*d: ", prec, i);
161 for_each_online_cpu(j)
162 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
163 seq_printf(p, " %8s", desc->irq_data.chip->name);
164 seq_printf(p, "-%-8s", desc->name);
165
166 if (action) {
167 seq_printf(p, " %s", action->name);
168 while ((action = action->next) != NULL)
169 seq_printf(p, ", %s", action->name);
170 }
171
172 seq_putc(p, '\n');
173out:
174 raw_spin_unlock_irqrestore(&desc->lock, flags);
175 return 0;
176}
177
178/* 125/*
179 * /proc/stat helpers 126 * /proc/stat helpers
180 */ 127 */
@@ -284,6 +231,7 @@ void fixup_irqs(void)
284 static int warned; 231 static int warned;
285 struct irq_desc *desc; 232 struct irq_desc *desc;
286 struct irq_data *data; 233 struct irq_data *data;
234 struct irq_chip *chip;
287 235
288 for_each_irq_desc(irq, desc) { 236 for_each_irq_desc(irq, desc) {
289 int break_affinity = 0; 237 int break_affinity = 0;
@@ -298,10 +246,10 @@ void fixup_irqs(void)
298 /* interrupt's are disabled at this point */ 246 /* interrupt's are disabled at this point */
299 raw_spin_lock(&desc->lock); 247 raw_spin_lock(&desc->lock);
300 248
301 data = &desc->irq_data; 249 data = irq_desc_get_irq_data(desc);
302 affinity = data->affinity; 250 affinity = data->affinity;
303 if (!irq_has_action(irq) || 251 if (!irq_has_action(irq) ||
304 cpumask_equal(affinity, cpu_online_mask)) { 252 cpumask_subset(affinity, cpu_online_mask)) {
305 raw_spin_unlock(&desc->lock); 253 raw_spin_unlock(&desc->lock);
306 continue; 254 continue;
307 } 255 }
@@ -318,16 +266,17 @@ void fixup_irqs(void)
318 affinity = cpu_all_mask; 266 affinity = cpu_all_mask;
319 } 267 }
320 268
321 if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_mask) 269 chip = irq_data_get_irq_chip(data);
322 data->chip->irq_mask(data); 270 if (!irqd_can_move_in_process_context(data) && chip->irq_mask)
271 chip->irq_mask(data);
323 272
324 if (data->chip->irq_set_affinity) 273 if (chip->irq_set_affinity)
325 data->chip->irq_set_affinity(data, affinity, true); 274 chip->irq_set_affinity(data, affinity, true);
326 else if (!(warned++)) 275 else if (!(warned++))
327 set_affinity = 0; 276 set_affinity = 0;
328 277
329 if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_unmask) 278 if (!irqd_can_move_in_process_context(data) && chip->irq_unmask)
330 data->chip->irq_unmask(data); 279 chip->irq_unmask(data);
331 280
332 raw_spin_unlock(&desc->lock); 281 raw_spin_unlock(&desc->lock);
333 282
@@ -359,10 +308,11 @@ void fixup_irqs(void)
359 irq = __this_cpu_read(vector_irq[vector]); 308 irq = __this_cpu_read(vector_irq[vector]);
360 309
361 desc = irq_to_desc(irq); 310 desc = irq_to_desc(irq);
362 data = &desc->irq_data; 311 data = irq_desc_get_irq_data(desc);
312 chip = irq_data_get_irq_chip(data);
363 raw_spin_lock(&desc->lock); 313 raw_spin_lock(&desc->lock);
364 if (data->chip->irq_retrigger) 314 if (chip->irq_retrigger)
365 data->chip->irq_retrigger(data); 315 chip->irq_retrigger(data);
366 raw_spin_unlock(&desc->lock); 316 raw_spin_unlock(&desc->lock);
367 } 317 }
368 } 318 }
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 9f76f89f43a4..f470e4ef993e 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -72,6 +72,7 @@ static irqreturn_t math_error_irq(int cpl, void *dev_id)
72static struct irqaction fpu_irq = { 72static struct irqaction fpu_irq = {
73 .handler = math_error_irq, 73 .handler = math_error_irq,
74 .name = "fpu", 74 .name = "fpu",
75 .flags = IRQF_NO_THREAD,
75}; 76};
76#endif 77#endif
77 78
@@ -81,6 +82,7 @@ static struct irqaction fpu_irq = {
81static struct irqaction irq2 = { 82static struct irqaction irq2 = {
82 .handler = no_action, 83 .handler = no_action,
83 .name = "cascade", 84 .name = "cascade",
85 .flags = IRQF_NO_THREAD,
84}; 86};
85 87
86DEFINE_PER_CPU(vector_irq_t, vector_irq) = { 88DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
@@ -111,7 +113,7 @@ void __init init_ISA_irqs(void)
111 legacy_pic->init(0); 113 legacy_pic->init(0);
112 114
113 for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) 115 for (i = 0; i < legacy_pic->nr_legacy_irqs; i++)
114 set_irq_chip_and_handler_name(i, chip, handle_level_irq, name); 116 irq_set_chip_and_handler_name(i, chip, handle_level_irq, name);
115} 117}
116 118
117void __init init_IRQ(void) 119void __init init_IRQ(void)
@@ -171,14 +173,77 @@ static void __init smp_intr_init(void)
171 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); 173 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
172 174
173 /* IPIs for invalidation */ 175 /* IPIs for invalidation */
174 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); 176#define ALLOC_INVTLB_VEC(NR) \
175 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); 177 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+NR, \
176 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); 178 invalidate_interrupt##NR)
177 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); 179
178 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); 180 switch (NUM_INVALIDATE_TLB_VECTORS) {
179 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); 181 default:
180 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); 182 ALLOC_INVTLB_VEC(31);
181 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); 183 case 31:
184 ALLOC_INVTLB_VEC(30);
185 case 30:
186 ALLOC_INVTLB_VEC(29);
187 case 29:
188 ALLOC_INVTLB_VEC(28);
189 case 28:
190 ALLOC_INVTLB_VEC(27);
191 case 27:
192 ALLOC_INVTLB_VEC(26);
193 case 26:
194 ALLOC_INVTLB_VEC(25);
195 case 25:
196 ALLOC_INVTLB_VEC(24);
197 case 24:
198 ALLOC_INVTLB_VEC(23);
199 case 23:
200 ALLOC_INVTLB_VEC(22);
201 case 22:
202 ALLOC_INVTLB_VEC(21);
203 case 21:
204 ALLOC_INVTLB_VEC(20);
205 case 20:
206 ALLOC_INVTLB_VEC(19);
207 case 19:
208 ALLOC_INVTLB_VEC(18);
209 case 18:
210 ALLOC_INVTLB_VEC(17);
211 case 17:
212 ALLOC_INVTLB_VEC(16);
213 case 16:
214 ALLOC_INVTLB_VEC(15);
215 case 15:
216 ALLOC_INVTLB_VEC(14);
217 case 14:
218 ALLOC_INVTLB_VEC(13);
219 case 13:
220 ALLOC_INVTLB_VEC(12);
221 case 12:
222 ALLOC_INVTLB_VEC(11);
223 case 11:
224 ALLOC_INVTLB_VEC(10);
225 case 10:
226 ALLOC_INVTLB_VEC(9);
227 case 9:
228 ALLOC_INVTLB_VEC(8);
229 case 8:
230 ALLOC_INVTLB_VEC(7);
231 case 7:
232 ALLOC_INVTLB_VEC(6);
233 case 6:
234 ALLOC_INVTLB_VEC(5);
235 case 5:
236 ALLOC_INVTLB_VEC(4);
237 case 4:
238 ALLOC_INVTLB_VEC(3);
239 case 3:
240 ALLOC_INVTLB_VEC(2);
241 case 2:
242 ALLOC_INVTLB_VEC(1);
243 case 1:
244 ALLOC_INVTLB_VEC(0);
245 break;
246 }
182 247
183 /* IPI for generic function call */ 248 /* IPI for generic function call */
184 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 249 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index a4130005028a..7c64c420a9f6 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -533,15 +533,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
533 } 533 }
534 return NOTIFY_DONE; 534 return NOTIFY_DONE;
535 535
536 case DIE_NMIWATCHDOG:
537 if (atomic_read(&kgdb_active) != -1) {
538 /* KGDB CPU roundup: */
539 kgdb_nmicallback(raw_smp_processor_id(), regs);
540 return NOTIFY_STOP;
541 }
542 /* Enter debugger: */
543 break;
544
545 case DIE_DEBUG: 536 case DIE_DEBUG:
546 if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { 537 if (atomic_read(&kgdb_cpu_doing_single_step) != -1) {
547 if (user_mode(regs)) 538 if (user_mode(regs))
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index d91c477b3f62..c969fd9d1566 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1276,6 +1276,14 @@ static int __kprobes can_optimize(unsigned long paddr)
1276 if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) 1276 if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
1277 return 0; 1277 return 0;
1278 1278
1279 /*
1280 * Do not optimize in the entry code due to the unstable
1281 * stack handling.
1282 */
1283 if ((paddr >= (unsigned long )__entry_text_start) &&
1284 (paddr < (unsigned long )__entry_text_end))
1285 return 0;
1286
1279 /* Check there is enough space for a relative jump. */ 1287 /* Check there is enough space for a relative jump. */
1280 if (size - offset < RELATIVEJUMP_SIZE) 1288 if (size - offset < RELATIVEJUMP_SIZE)
1281 return 0; 1289 return 0;
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 0fe6d1a66c38..c5610384ab16 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -66,7 +66,6 @@ struct microcode_amd {
66 unsigned int mpb[0]; 66 unsigned int mpb[0];
67}; 67};
68 68
69#define UCODE_MAX_SIZE 2048
70#define UCODE_CONTAINER_SECTION_HDR 8 69#define UCODE_CONTAINER_SECTION_HDR 8
71#define UCODE_CONTAINER_HEADER_SIZE 12 70#define UCODE_CONTAINER_HEADER_SIZE 12
72 71
@@ -77,20 +76,20 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
77 struct cpuinfo_x86 *c = &cpu_data(cpu); 76 struct cpuinfo_x86 *c = &cpu_data(cpu);
78 u32 dummy; 77 u32 dummy;
79 78
80 memset(csig, 0, sizeof(*csig));
81 if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { 79 if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
82 pr_warning("microcode: CPU%d: AMD CPU family 0x%x not " 80 pr_warning("CPU%d: family %d not supported\n", cpu, c->x86);
83 "supported\n", cpu, c->x86);
84 return -1; 81 return -1;
85 } 82 }
83
86 rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); 84 rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy);
87 pr_info("CPU%d: patch_level=0x%x\n", cpu, csig->rev); 85 pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
86
88 return 0; 87 return 0;
89} 88}
90 89
91static int get_matching_microcode(int cpu, void *mc, int rev) 90static int get_matching_microcode(int cpu, struct microcode_header_amd *mc_hdr,
91 int rev)
92{ 92{
93 struct microcode_header_amd *mc_header = mc;
94 unsigned int current_cpu_id; 93 unsigned int current_cpu_id;
95 u16 equiv_cpu_id = 0; 94 u16 equiv_cpu_id = 0;
96 unsigned int i = 0; 95 unsigned int i = 0;
@@ -109,17 +108,17 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
109 if (!equiv_cpu_id) 108 if (!equiv_cpu_id)
110 return 0; 109 return 0;
111 110
112 if (mc_header->processor_rev_id != equiv_cpu_id) 111 if (mc_hdr->processor_rev_id != equiv_cpu_id)
113 return 0; 112 return 0;
114 113
115 /* ucode might be chipset specific -- currently we don't support this */ 114 /* ucode might be chipset specific -- currently we don't support this */
116 if (mc_header->nb_dev_id || mc_header->sb_dev_id) { 115 if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
117 pr_err("CPU%d: loading of chipset specific code not yet supported\n", 116 pr_err("CPU%d: chipset specific code not yet supported\n",
118 cpu); 117 cpu);
119 return 0; 118 return 0;
120 } 119 }
121 120
122 if (mc_header->patch_id <= rev) 121 if (mc_hdr->patch_id <= rev)
123 return 0; 122 return 0;
124 123
125 return 1; 124 return 1;
@@ -144,71 +143,93 @@ static int apply_microcode_amd(int cpu)
144 143
145 /* check current patch id and patch's id for match */ 144 /* check current patch id and patch's id for match */
146 if (rev != mc_amd->hdr.patch_id) { 145 if (rev != mc_amd->hdr.patch_id) {
147 pr_err("CPU%d: update failed (for patch_level=0x%x)\n", 146 pr_err("CPU%d: update failed for patch_level=0x%08x\n",
148 cpu, mc_amd->hdr.patch_id); 147 cpu, mc_amd->hdr.patch_id);
149 return -1; 148 return -1;
150 } 149 }
151 150
152 pr_info("CPU%d: updated (new patch_level=0x%x)\n", cpu, rev); 151 pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev);
153 uci->cpu_sig.rev = rev; 152 uci->cpu_sig.rev = rev;
154 153
155 return 0; 154 return 0;
156} 155}
157 156
158static void * 157static unsigned int verify_ucode_size(int cpu, const u8 *buf, unsigned int size)
159get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
160{ 158{
161 unsigned int total_size; 159 struct cpuinfo_x86 *c = &cpu_data(cpu);
162 u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; 160 unsigned int max_size, actual_size;
163 void *mc; 161
162#define F1XH_MPB_MAX_SIZE 2048
163#define F14H_MPB_MAX_SIZE 1824
164#define F15H_MPB_MAX_SIZE 4096
165
166 switch (c->x86) {
167 case 0x14:
168 max_size = F14H_MPB_MAX_SIZE;
169 break;
170 case 0x15:
171 max_size = F15H_MPB_MAX_SIZE;
172 break;
173 default:
174 max_size = F1XH_MPB_MAX_SIZE;
175 break;
176 }
164 177
165 get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR); 178 actual_size = buf[4] + (buf[5] << 8);
166 179
167 if (section_hdr[0] != UCODE_UCODE_TYPE) { 180 if (actual_size > size || actual_size > max_size) {
168 pr_err("error: invalid type field in container file section header\n"); 181 pr_err("section size mismatch\n");
169 return NULL; 182 return 0;
170 } 183 }
171 184
172 total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); 185 return actual_size;
186}
173 187
174 if (total_size > size || total_size > UCODE_MAX_SIZE) { 188static struct microcode_header_amd *
175 pr_err("error: size mismatch\n"); 189get_next_ucode(int cpu, const u8 *buf, unsigned int size, unsigned int *mc_size)
176 return NULL; 190{
191 struct microcode_header_amd *mc = NULL;
192 unsigned int actual_size = 0;
193
194 if (buf[0] != UCODE_UCODE_TYPE) {
195 pr_err("invalid type field in container file section header\n");
196 goto out;
177 } 197 }
178 198
179 mc = vzalloc(UCODE_MAX_SIZE); 199 actual_size = verify_ucode_size(cpu, buf, size);
200 if (!actual_size)
201 goto out;
202
203 mc = vzalloc(actual_size);
180 if (!mc) 204 if (!mc)
181 return NULL; 205 goto out;
182 206
183 get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size); 207 get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, actual_size);
184 *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; 208 *mc_size = actual_size + UCODE_CONTAINER_SECTION_HDR;
185 209
210out:
186 return mc; 211 return mc;
187} 212}
188 213
189static int install_equiv_cpu_table(const u8 *buf) 214static int install_equiv_cpu_table(const u8 *buf)
190{ 215{
191 u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE]; 216 unsigned int *ibuf = (unsigned int *)buf;
192 unsigned int *buf_pos = (unsigned int *)container_hdr; 217 unsigned int type = ibuf[1];
193 unsigned long size; 218 unsigned int size = ibuf[2];
194 219
195 get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE); 220 if (type != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
196 221 pr_err("empty section/"
197 size = buf_pos[2]; 222 "invalid type field in container file section header\n");
198 223 return -EINVAL;
199 if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
200 pr_err("error: invalid type field in container file section header\n");
201 return 0;
202 } 224 }
203 225
204 equiv_cpu_table = vmalloc(size); 226 equiv_cpu_table = vmalloc(size);
205 if (!equiv_cpu_table) { 227 if (!equiv_cpu_table) {
206 pr_err("failed to allocate equivalent CPU table\n"); 228 pr_err("failed to allocate equivalent CPU table\n");
207 return 0; 229 return -ENOMEM;
208 } 230 }
209 231
210 buf += UCODE_CONTAINER_HEADER_SIZE; 232 get_ucode_data(equiv_cpu_table, buf + UCODE_CONTAINER_HEADER_SIZE, size);
211 get_ucode_data(equiv_cpu_table, buf, size);
212 233
213 return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ 234 return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */
214} 235}
@@ -223,16 +244,16 @@ static enum ucode_state
223generic_load_microcode(int cpu, const u8 *data, size_t size) 244generic_load_microcode(int cpu, const u8 *data, size_t size)
224{ 245{
225 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 246 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
247 struct microcode_header_amd *mc_hdr = NULL;
248 unsigned int mc_size, leftover;
249 int offset;
226 const u8 *ucode_ptr = data; 250 const u8 *ucode_ptr = data;
227 void *new_mc = NULL; 251 void *new_mc = NULL;
228 void *mc; 252 unsigned int new_rev = uci->cpu_sig.rev;
229 int new_rev = uci->cpu_sig.rev;
230 unsigned int leftover;
231 unsigned long offset;
232 enum ucode_state state = UCODE_OK; 253 enum ucode_state state = UCODE_OK;
233 254
234 offset = install_equiv_cpu_table(ucode_ptr); 255 offset = install_equiv_cpu_table(ucode_ptr);
235 if (!offset) { 256 if (offset < 0) {
236 pr_err("failed to create equivalent cpu table\n"); 257 pr_err("failed to create equivalent cpu table\n");
237 return UCODE_ERROR; 258 return UCODE_ERROR;
238 } 259 }
@@ -241,64 +262,65 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
241 leftover = size - offset; 262 leftover = size - offset;
242 263
243 while (leftover) { 264 while (leftover) {
244 unsigned int uninitialized_var(mc_size); 265 mc_hdr = get_next_ucode(cpu, ucode_ptr, leftover, &mc_size);
245 struct microcode_header_amd *mc_header; 266 if (!mc_hdr)
246
247 mc = get_next_ucode(ucode_ptr, leftover, &mc_size);
248 if (!mc)
249 break; 267 break;
250 268
251 mc_header = (struct microcode_header_amd *)mc; 269 if (get_matching_microcode(cpu, mc_hdr, new_rev)) {
252 if (get_matching_microcode(cpu, mc, new_rev)) {
253 vfree(new_mc); 270 vfree(new_mc);
254 new_rev = mc_header->patch_id; 271 new_rev = mc_hdr->patch_id;
255 new_mc = mc; 272 new_mc = mc_hdr;
256 } else 273 } else
257 vfree(mc); 274 vfree(mc_hdr);
258 275
259 ucode_ptr += mc_size; 276 ucode_ptr += mc_size;
260 leftover -= mc_size; 277 leftover -= mc_size;
261 } 278 }
262 279
263 if (new_mc) { 280 if (!new_mc) {
264 if (!leftover) {
265 vfree(uci->mc);
266 uci->mc = new_mc;
267 pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
268 cpu, new_rev, uci->cpu_sig.rev);
269 } else {
270 vfree(new_mc);
271 state = UCODE_ERROR;
272 }
273 } else
274 state = UCODE_NFOUND; 281 state = UCODE_NFOUND;
282 goto free_table;
283 }
275 284
285 if (!leftover) {
286 vfree(uci->mc);
287 uci->mc = new_mc;
288 pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n",
289 cpu, uci->cpu_sig.rev, new_rev);
290 } else {
291 vfree(new_mc);
292 state = UCODE_ERROR;
293 }
294
295free_table:
276 free_equiv_cpu_table(); 296 free_equiv_cpu_table();
277 297
278 return state; 298 return state;
279} 299}
280 300
281static enum ucode_state request_microcode_fw(int cpu, struct device *device) 301static enum ucode_state request_microcode_amd(int cpu, struct device *device)
282{ 302{
283 const char *fw_name = "amd-ucode/microcode_amd.bin"; 303 const char *fw_name = "amd-ucode/microcode_amd.bin";
284 const struct firmware *firmware; 304 const struct firmware *fw;
285 enum ucode_state ret; 305 enum ucode_state ret = UCODE_NFOUND;
286 306
287 if (request_firmware(&firmware, fw_name, device)) { 307 if (request_firmware(&fw, fw_name, device)) {
288 printk(KERN_ERR "microcode: failed to load file %s\n", fw_name); 308 pr_err("failed to load file %s\n", fw_name);
289 return UCODE_NFOUND; 309 goto out;
290 } 310 }
291 311
292 if (*(u32 *)firmware->data != UCODE_MAGIC) { 312 ret = UCODE_ERROR;
293 pr_err("invalid UCODE_MAGIC (0x%08x)\n", 313 if (*(u32 *)fw->data != UCODE_MAGIC) {
294 *(u32 *)firmware->data); 314 pr_err("invalid magic value (0x%08x)\n", *(u32 *)fw->data);
295 return UCODE_ERROR; 315 goto fw_release;
296 } 316 }
297 317
298 ret = generic_load_microcode(cpu, firmware->data, firmware->size); 318 ret = generic_load_microcode(cpu, fw->data, fw->size);
299 319
300 release_firmware(firmware); 320fw_release:
321 release_firmware(fw);
301 322
323out:
302 return ret; 324 return ret;
303} 325}
304 326
@@ -319,7 +341,7 @@ static void microcode_fini_cpu_amd(int cpu)
319 341
320static struct microcode_ops microcode_amd_ops = { 342static struct microcode_ops microcode_amd_ops = {
321 .request_microcode_user = request_microcode_user, 343 .request_microcode_user = request_microcode_user,
322 .request_microcode_fw = request_microcode_fw, 344 .request_microcode_fw = request_microcode_amd,
323 .collect_cpu_info = collect_cpu_info_amd, 345 .collect_cpu_info = collect_cpu_info_amd,
324 .apply_microcode = apply_microcode_amd, 346 .apply_microcode = apply_microcode_amd,
325 .microcode_fini_cpu = microcode_fini_cpu_amd, 347 .microcode_fini_cpu = microcode_fini_cpu_amd,
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 1cca374a2bac..87af68e0e1e1 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -417,8 +417,10 @@ static int mc_sysdev_add(struct sys_device *sys_dev)
417 if (err) 417 if (err)
418 return err; 418 return err;
419 419
420 if (microcode_init_cpu(cpu) == UCODE_ERROR) 420 if (microcode_init_cpu(cpu) == UCODE_ERROR) {
421 err = -EINVAL; 421 sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
422 return -EINVAL;
423 }
422 424
423 return err; 425 return err;
424} 426}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ff4554198981..99fa3adf0141 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -110,12 +110,9 @@ void show_regs_common(void)
110 init_utsname()->release, 110 init_utsname()->release,
111 (int)strcspn(init_utsname()->version, " "), 111 (int)strcspn(init_utsname()->version, " "),
112 init_utsname()->version); 112 init_utsname()->version);
113 printk(KERN_CONT " "); 113 printk(KERN_CONT " %s %s", vendor, product);
114 printk(KERN_CONT "%s %s", vendor, product); 114 if (board)
115 if (board) { 115 printk(KERN_CONT "/%s", board);
116 printk(KERN_CONT "/");
117 printk(KERN_CONT "%s", board);
118 }
119 printk(KERN_CONT "\n"); 116 printk(KERN_CONT "\n");
120} 117}
121 118
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index fc7aae1e2bc7..715037caeb43 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -285,6 +285,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
285 DMI_MATCH(DMI_BOARD_NAME, "P4S800"), 285 DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
286 }, 286 },
287 }, 287 },
288 { /* Handle problems with rebooting on VersaLogic Menlow boards */
289 .callback = set_bios_reboot,
290 .ident = "VersaLogic Menlow based board",
291 .matches = {
292 DMI_MATCH(DMI_BOARD_VENDOR, "VersaLogic Corporation"),
293 DMI_MATCH(DMI_BOARD_NAME, "VersaLogic Menlow board"),
294 },
295 },
288 { } 296 { }
289}; 297};
290 298
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b3143bc74e6c..b176f2b1f45d 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -294,10 +294,32 @@ static void __init init_gbpages(void)
294 else 294 else
295 direct_gbpages = 0; 295 direct_gbpages = 0;
296} 296}
297
298static void __init cleanup_highmap_brk_end(void)
299{
300 pud_t *pud;
301 pmd_t *pmd;
302
303 mmu_cr4_features = read_cr4();
304
305 /*
306 * _brk_end cannot change anymore, but it and _end may be
307 * located on different 2M pages. cleanup_highmap(), however,
308 * can only consider _end when it runs, so destroy any
309 * mappings beyond _brk_end here.
310 */
311 pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
312 pmd = pmd_offset(pud, _brk_end - 1);
313 while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
314 pmd_clear(pmd);
315}
297#else 316#else
298static inline void init_gbpages(void) 317static inline void init_gbpages(void)
299{ 318{
300} 319}
320static inline void cleanup_highmap_brk_end(void)
321{
322}
301#endif 323#endif
302 324
303static void __init reserve_brk(void) 325static void __init reserve_brk(void)
@@ -308,6 +330,8 @@ static void __init reserve_brk(void)
308 /* Mark brk area as locked down and no longer taking any 330 /* Mark brk area as locked down and no longer taking any
309 new allocations */ 331 new allocations */
310 _brk_start = 0; 332 _brk_start = 0;
333
334 cleanup_highmap_brk_end();
311} 335}
312 336
313#ifdef CONFIG_BLK_DEV_INITRD 337#ifdef CONFIG_BLK_DEV_INITRD
@@ -695,15 +719,6 @@ static int __init parse_reservelow(char *p)
695 719
696early_param("reservelow", parse_reservelow); 720early_param("reservelow", parse_reservelow);
697 721
698static u64 __init get_max_mapped(void)
699{
700 u64 end = max_pfn_mapped;
701
702 end <<= PAGE_SHIFT;
703
704 return end;
705}
706
707/* 722/*
708 * Determine if we were loaded by an EFI loader. If so, then we have also been 723 * Determine if we were loaded by an EFI loader. If so, then we have also been
709 * passed the efi memmap, systab, etc., so we should use these data structures 724 * passed the efi memmap, systab, etc., so we should use these data structures
@@ -719,8 +734,6 @@ static u64 __init get_max_mapped(void)
719 734
720void __init setup_arch(char **cmdline_p) 735void __init setup_arch(char **cmdline_p)
721{ 736{
722 int acpi = 0;
723 int amd = 0;
724 unsigned long flags; 737 unsigned long flags;
725 738
726#ifdef CONFIG_X86_32 739#ifdef CONFIG_X86_32
@@ -999,19 +1012,7 @@ void __init setup_arch(char **cmdline_p)
999 1012
1000 early_acpi_boot_init(); 1013 early_acpi_boot_init();
1001 1014
1002#ifdef CONFIG_ACPI_NUMA 1015 initmem_init();
1003 /*
1004 * Parse SRAT to discover nodes.
1005 */
1006 acpi = acpi_numa_init();
1007#endif
1008
1009#ifdef CONFIG_AMD_NUMA
1010 if (!acpi)
1011 amd = !amd_numa_init(0, max_pfn);
1012#endif
1013
1014 initmem_init(0, max_pfn, acpi, amd);
1015 memblock_find_dma_reserve(); 1016 memblock_find_dma_reserve();
1016 dma32_reserve_bootmem(); 1017 dma32_reserve_bootmem();
1017 1018
@@ -1055,9 +1056,7 @@ void __init setup_arch(char **cmdline_p)
1055 1056
1056 prefill_possible_map(); 1057 prefill_possible_map();
1057 1058
1058#ifdef CONFIG_X86_64
1059 init_cpu_to_node(); 1059 init_cpu_to_node();
1060#endif
1061 1060
1062 init_apic_mappings(); 1061 init_apic_mappings();
1063 ioapic_and_gsi_init(); 1062 ioapic_and_gsi_init();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 002b79685f73..71f4727da373 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -225,10 +225,15 @@ void __init setup_per_cpu_areas(void)
225 per_cpu(x86_bios_cpu_apicid, cpu) = 225 per_cpu(x86_bios_cpu_apicid, cpu) =
226 early_per_cpu_map(x86_bios_cpu_apicid, cpu); 226 early_per_cpu_map(x86_bios_cpu_apicid, cpu);
227#endif 227#endif
228#ifdef CONFIG_X86_32
229 per_cpu(x86_cpu_to_logical_apicid, cpu) =
230 early_per_cpu_map(x86_cpu_to_logical_apicid, cpu);
231#endif
228#ifdef CONFIG_X86_64 232#ifdef CONFIG_X86_64
229 per_cpu(irq_stack_ptr, cpu) = 233 per_cpu(irq_stack_ptr, cpu) =
230 per_cpu(irq_stack_union.irq_stack, cpu) + 234 per_cpu(irq_stack_union.irq_stack, cpu) +
231 IRQ_STACK_SIZE - 64; 235 IRQ_STACK_SIZE - 64;
236#endif
232#ifdef CONFIG_NUMA 237#ifdef CONFIG_NUMA
233 per_cpu(x86_cpu_to_node_map, cpu) = 238 per_cpu(x86_cpu_to_node_map, cpu) =
234 early_per_cpu_map(x86_cpu_to_node_map, cpu); 239 early_per_cpu_map(x86_cpu_to_node_map, cpu);
@@ -242,7 +247,6 @@ void __init setup_per_cpu_areas(void)
242 */ 247 */
243 set_cpu_numa_node(cpu, early_cpu_to_node(cpu)); 248 set_cpu_numa_node(cpu, early_cpu_to_node(cpu));
244#endif 249#endif
245#endif
246 /* 250 /*
247 * Up to this point, the boot CPU has been using .init.data 251 * Up to this point, the boot CPU has been using .init.data
248 * area. Reload any changed state for the boot CPU. 252 * area. Reload any changed state for the boot CPU.
@@ -256,7 +260,10 @@ void __init setup_per_cpu_areas(void)
256 early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; 260 early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
257 early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; 261 early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
258#endif 262#endif
259#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) 263#ifdef CONFIG_X86_32
264 early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL;
265#endif
266#ifdef CONFIG_NUMA
260 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; 267 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
261#endif 268#endif
262 269
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 09d0172a0059..e9efdfd51c8d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -72,10 +72,6 @@
72#include <asm/smpboot_hooks.h> 72#include <asm/smpboot_hooks.h>
73#include <asm/i8259.h> 73#include <asm/i8259.h>
74 74
75#ifdef CONFIG_X86_32
76u8 apicid_2_node[MAX_APICID];
77#endif
78
79/* State of each CPU */ 75/* State of each CPU */
80DEFINE_PER_CPU(int, cpu_state) = { 0 }; 76DEFINE_PER_CPU(int, cpu_state) = { 0 };
81 77
@@ -131,68 +127,14 @@ EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
131DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); 127DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
132EXPORT_PER_CPU_SYMBOL(cpu_core_map); 128EXPORT_PER_CPU_SYMBOL(cpu_core_map);
133 129
130DEFINE_PER_CPU(cpumask_var_t, cpu_llc_shared_map);
131
134/* Per CPU bogomips and other parameters */ 132/* Per CPU bogomips and other parameters */
135DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); 133DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
136EXPORT_PER_CPU_SYMBOL(cpu_info); 134EXPORT_PER_CPU_SYMBOL(cpu_info);
137 135
138atomic_t init_deasserted; 136atomic_t init_deasserted;
139 137
140#if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
141/* which node each logical CPU is on */
142int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
143EXPORT_SYMBOL(cpu_to_node_map);
144
145/* set up a mapping between cpu and node. */
146static void map_cpu_to_node(int cpu, int node)
147{
148 printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node);
149 cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
150 cpu_to_node_map[cpu] = node;
151}
152
153/* undo a mapping between cpu and node. */
154static void unmap_cpu_to_node(int cpu)
155{
156 int node;
157
158 printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu);
159 for (node = 0; node < MAX_NUMNODES; node++)
160 cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
161 cpu_to_node_map[cpu] = 0;
162}
163#else /* !(CONFIG_NUMA && CONFIG_X86_32) */
164#define map_cpu_to_node(cpu, node) ({})
165#define unmap_cpu_to_node(cpu) ({})
166#endif
167
168#ifdef CONFIG_X86_32
169static int boot_cpu_logical_apicid;
170
171u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
172 { [0 ... NR_CPUS-1] = BAD_APICID };
173
174static void map_cpu_to_logical_apicid(void)
175{
176 int cpu = smp_processor_id();
177 int apicid = logical_smp_processor_id();
178 int node = apic->apicid_to_node(apicid);
179
180 if (!node_online(node))
181 node = first_online_node;
182
183 cpu_2_logical_apicid[cpu] = apicid;
184 map_cpu_to_node(cpu, node);
185}
186
187void numa_remove_cpu(int cpu)
188{
189 cpu_2_logical_apicid[cpu] = BAD_APICID;
190 unmap_cpu_to_node(cpu);
191}
192#else
193#define map_cpu_to_logical_apicid() do {} while (0)
194#endif
195
196/* 138/*
197 * Report back to the Boot Processor. 139 * Report back to the Boot Processor.
198 * Running on AP. 140 * Running on AP.
@@ -260,7 +202,6 @@ static void __cpuinit smp_callin(void)
260 apic->smp_callin_clear_local_apic(); 202 apic->smp_callin_clear_local_apic();
261 setup_local_APIC(); 203 setup_local_APIC();
262 end_local_APIC_setup(); 204 end_local_APIC_setup();
263 map_cpu_to_logical_apicid();
264 205
265 /* 206 /*
266 * Need to setup vector mappings before we enable interrupts. 207 * Need to setup vector mappings before we enable interrupts.
@@ -356,23 +297,6 @@ notrace static void __cpuinit start_secondary(void *unused)
356 cpu_idle(); 297 cpu_idle();
357} 298}
358 299
359#ifdef CONFIG_CPUMASK_OFFSTACK
360/* In this case, llc_shared_map is a pointer to a cpumask. */
361static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst,
362 const struct cpuinfo_x86 *src)
363{
364 struct cpumask *llc = dst->llc_shared_map;
365 *dst = *src;
366 dst->llc_shared_map = llc;
367}
368#else
369static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst,
370 const struct cpuinfo_x86 *src)
371{
372 *dst = *src;
373}
374#endif /* CONFIG_CPUMASK_OFFSTACK */
375
376/* 300/*
377 * The bootstrap kernel entry code has set these up. Save them for 301 * The bootstrap kernel entry code has set these up. Save them for
378 * a given CPU 302 * a given CPU
@@ -382,7 +306,7 @@ void __cpuinit smp_store_cpu_info(int id)
382{ 306{
383 struct cpuinfo_x86 *c = &cpu_data(id); 307 struct cpuinfo_x86 *c = &cpu_data(id);
384 308
385 copy_cpuinfo_x86(c, &boot_cpu_data); 309 *c = boot_cpu_data;
386 c->cpu_index = id; 310 c->cpu_index = id;
387 if (id != 0) 311 if (id != 0)
388 identify_secondary_cpu(c); 312 identify_secondary_cpu(c);
@@ -390,15 +314,12 @@ void __cpuinit smp_store_cpu_info(int id)
390 314
391static void __cpuinit link_thread_siblings(int cpu1, int cpu2) 315static void __cpuinit link_thread_siblings(int cpu1, int cpu2)
392{ 316{
393 struct cpuinfo_x86 *c1 = &cpu_data(cpu1);
394 struct cpuinfo_x86 *c2 = &cpu_data(cpu2);
395
396 cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2)); 317 cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2));
397 cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1)); 318 cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1));
398 cpumask_set_cpu(cpu1, cpu_core_mask(cpu2)); 319 cpumask_set_cpu(cpu1, cpu_core_mask(cpu2));
399 cpumask_set_cpu(cpu2, cpu_core_mask(cpu1)); 320 cpumask_set_cpu(cpu2, cpu_core_mask(cpu1));
400 cpumask_set_cpu(cpu1, c2->llc_shared_map); 321 cpumask_set_cpu(cpu1, cpu_llc_shared_mask(cpu2));
401 cpumask_set_cpu(cpu2, c1->llc_shared_map); 322 cpumask_set_cpu(cpu2, cpu_llc_shared_mask(cpu1));
402} 323}
403 324
404 325
@@ -415,6 +336,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
415 336
416 if (cpu_has(c, X86_FEATURE_TOPOEXT)) { 337 if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
417 if (c->phys_proc_id == o->phys_proc_id && 338 if (c->phys_proc_id == o->phys_proc_id &&
339 per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i) &&
418 c->compute_unit_id == o->compute_unit_id) 340 c->compute_unit_id == o->compute_unit_id)
419 link_thread_siblings(cpu, i); 341 link_thread_siblings(cpu, i);
420 } else if (c->phys_proc_id == o->phys_proc_id && 342 } else if (c->phys_proc_id == o->phys_proc_id &&
@@ -426,7 +348,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
426 cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); 348 cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
427 } 349 }
428 350
429 cpumask_set_cpu(cpu, c->llc_shared_map); 351 cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
430 352
431 if (__this_cpu_read(cpu_info.x86_max_cores) == 1) { 353 if (__this_cpu_read(cpu_info.x86_max_cores) == 1) {
432 cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu)); 354 cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
@@ -437,8 +359,8 @@ void __cpuinit set_cpu_sibling_map(int cpu)
437 for_each_cpu(i, cpu_sibling_setup_mask) { 359 for_each_cpu(i, cpu_sibling_setup_mask) {
438 if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && 360 if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
439 per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { 361 per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
440 cpumask_set_cpu(i, c->llc_shared_map); 362 cpumask_set_cpu(i, cpu_llc_shared_mask(cpu));
441 cpumask_set_cpu(cpu, cpu_data(i).llc_shared_map); 363 cpumask_set_cpu(cpu, cpu_llc_shared_mask(i));
442 } 364 }
443 if (c->phys_proc_id == cpu_data(i).phys_proc_id) { 365 if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
444 cpumask_set_cpu(i, cpu_core_mask(cpu)); 366 cpumask_set_cpu(i, cpu_core_mask(cpu));
@@ -477,7 +399,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
477 !(cpu_has(c, X86_FEATURE_AMD_DCM))) 399 !(cpu_has(c, X86_FEATURE_AMD_DCM)))
478 return cpu_core_mask(cpu); 400 return cpu_core_mask(cpu);
479 else 401 else
480 return c->llc_shared_map; 402 return cpu_llc_shared_mask(cpu);
481} 403}
482 404
483static void impress_friends(void) 405static void impress_friends(void)
@@ -969,7 +891,6 @@ static __init void disable_smp(void)
969 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); 891 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
970 else 892 else
971 physid_set_mask_of_physid(0, &phys_cpu_present_map); 893 physid_set_mask_of_physid(0, &phys_cpu_present_map);
972 map_cpu_to_logical_apicid();
973 cpumask_set_cpu(0, cpu_sibling_mask(0)); 894 cpumask_set_cpu(0, cpu_sibling_mask(0));
974 cpumask_set_cpu(0, cpu_core_mask(0)); 895 cpumask_set_cpu(0, cpu_core_mask(0));
975} 896}
@@ -1098,21 +1019,19 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1098 1019
1099 preempt_disable(); 1020 preempt_disable();
1100 smp_cpu_index_default(); 1021 smp_cpu_index_default();
1101 memcpy(__this_cpu_ptr(&cpu_info), &boot_cpu_data, sizeof(cpu_info)); 1022
1102 cpumask_copy(cpu_callin_mask, cpumask_of(0));
1103 mb();
1104 /* 1023 /*
1105 * Setup boot CPU information 1024 * Setup boot CPU information
1106 */ 1025 */
1107 smp_store_cpu_info(0); /* Final full version of the data */ 1026 smp_store_cpu_info(0); /* Final full version of the data */
1108#ifdef CONFIG_X86_32 1027 cpumask_copy(cpu_callin_mask, cpumask_of(0));
1109 boot_cpu_logical_apicid = logical_smp_processor_id(); 1028 mb();
1110#endif 1029
1111 current_thread_info()->cpu = 0; /* needed? */ 1030 current_thread_info()->cpu = 0; /* needed? */
1112 for_each_possible_cpu(i) { 1031 for_each_possible_cpu(i) {
1113 zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); 1032 zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
1114 zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); 1033 zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
1115 zalloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL); 1034 zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
1116 } 1035 }
1117 set_cpu_sibling_map(0); 1036 set_cpu_sibling_map(0);
1118 1037
@@ -1148,8 +1067,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1148 1067
1149 bsp_end_local_APIC_setup(); 1068 bsp_end_local_APIC_setup();
1150 1069
1151 map_cpu_to_logical_apicid();
1152
1153 if (apic->setup_portio_remap) 1070 if (apic->setup_portio_remap)
1154 apic->setup_portio_remap(); 1071 apic->setup_portio_remap();
1155 1072
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index b35786dc9b8f..5f181742e8f9 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -340,3 +340,6 @@ ENTRY(sys_call_table)
340 .long sys_fanotify_init 340 .long sys_fanotify_init
341 .long sys_fanotify_mark 341 .long sys_fanotify_mark
342 .long sys_prlimit64 /* 340 */ 342 .long sys_prlimit64 /* 340 */
343 .long sys_name_to_handle_at
344 .long sys_open_by_handle_at
345 .long sys_clock_adjtime
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index bf4700755184..0381e1f3baed 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -105,6 +105,7 @@ SECTIONS
105 SCHED_TEXT 105 SCHED_TEXT
106 LOCK_TEXT 106 LOCK_TEXT
107 KPROBES_TEXT 107 KPROBES_TEXT
108 ENTRY_TEXT
108 IRQENTRY_TEXT 109 IRQENTRY_TEXT
109 *(.fixup) 110 *(.fixup)
110 *(.gnu.warning) 111 *(.gnu.warning)
@@ -305,7 +306,7 @@ SECTIONS
305 } 306 }
306 307
307#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) 308#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
308 PERCPU(THREAD_SIZE) 309 PERCPU(PAGE_SIZE)
309#endif 310#endif
310 311
311 . = ALIGN(PAGE_SIZE); 312 . = ALIGN(PAGE_SIZE);
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 1b950d151e58..9796c2f3d074 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -52,6 +52,7 @@ extern void *__memcpy(void *, const void *, __kernel_size_t);
52EXPORT_SYMBOL(memset); 52EXPORT_SYMBOL(memset);
53EXPORT_SYMBOL(memcpy); 53EXPORT_SYMBOL(memcpy);
54EXPORT_SYMBOL(__memcpy); 54EXPORT_SYMBOL(__memcpy);
55EXPORT_SYMBOL(memmove);
55 56
56EXPORT_SYMBOL(empty_zero_page); 57EXPORT_SYMBOL(empty_zero_page);
57#ifndef CONFIG_PARAVIRT 58#ifndef CONFIG_PARAVIRT
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 54ce246a383e..63fec1531e89 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2777,6 +2777,8 @@ static int dr_interception(struct vcpu_svm *svm)
2777 kvm_register_write(&svm->vcpu, reg, val); 2777 kvm_register_write(&svm->vcpu, reg, val);
2778 } 2778 }
2779 2779
2780 skip_emulated_instruction(&svm->vcpu);
2781
2780 return 1; 2782 return 1;
2781} 2783}
2782 2784
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 1357d7cf4ec8..db932760ea82 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -62,21 +62,21 @@ TRACE_EVENT(kvm_hv_hypercall,
62 TP_ARGS(code, fast, rep_cnt, rep_idx, ingpa, outgpa), 62 TP_ARGS(code, fast, rep_cnt, rep_idx, ingpa, outgpa),
63 63
64 TP_STRUCT__entry( 64 TP_STRUCT__entry(
65 __field( __u16, code )
66 __field( bool, fast )
67 __field( __u16, rep_cnt ) 65 __field( __u16, rep_cnt )
68 __field( __u16, rep_idx ) 66 __field( __u16, rep_idx )
69 __field( __u64, ingpa ) 67 __field( __u64, ingpa )
70 __field( __u64, outgpa ) 68 __field( __u64, outgpa )
69 __field( __u16, code )
70 __field( bool, fast )
71 ), 71 ),
72 72
73 TP_fast_assign( 73 TP_fast_assign(
74 __entry->code = code;
75 __entry->fast = fast;
76 __entry->rep_cnt = rep_cnt; 74 __entry->rep_cnt = rep_cnt;
77 __entry->rep_idx = rep_idx; 75 __entry->rep_idx = rep_idx;
78 __entry->ingpa = ingpa; 76 __entry->ingpa = ingpa;
79 __entry->outgpa = outgpa; 77 __entry->outgpa = outgpa;
78 __entry->code = code;
79 __entry->fast = fast;
80 ), 80 ),
81 81
82 TP_printk("code 0x%x %s cnt 0x%x idx 0x%x in 0x%llx out 0x%llx", 82 TP_printk("code 0x%x %s cnt 0x%x idx 0x%x in 0x%llx out 0x%llx",
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index eba687f0cc0c..b9ec1c74943c 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -847,7 +847,7 @@ static void __init lguest_init_IRQ(void)
847void lguest_setup_irq(unsigned int irq) 847void lguest_setup_irq(unsigned int irq)
848{ 848{
849 irq_alloc_desc_at(irq, 0); 849 irq_alloc_desc_at(irq, 0);
850 set_irq_chip_and_handler_name(irq, &lguest_irq_controller, 850 irq_set_chip_and_handler_name(irq, &lguest_irq_controller,
851 handle_level_irq, "level"); 851 handle_level_irq, "level");
852} 852}
853 853
@@ -995,7 +995,7 @@ static void lguest_time_irq(unsigned int irq, struct irq_desc *desc)
995static void lguest_time_init(void) 995static void lguest_time_init(void)
996{ 996{
997 /* Set up the timer interrupt (0) to go to our simple timer routine */ 997 /* Set up the timer interrupt (0) to go to our simple timer routine */
998 set_irq_handler(0, lguest_time_irq); 998 irq_set_handler(0, lguest_time_irq);
999 999
1000 clocksource_register(&lguest_clock); 1000 clocksource_register(&lguest_clock);
1001 1001
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
index 2cda60a06e65..e8e7e0d06f42 100644
--- a/arch/x86/lib/atomic64_386_32.S
+++ b/arch/x86/lib/atomic64_386_32.S
@@ -15,14 +15,12 @@
15 15
16/* if you want SMP support, implement these with real spinlocks */ 16/* if you want SMP support, implement these with real spinlocks */
17.macro LOCK reg 17.macro LOCK reg
18 pushfl 18 pushfl_cfi
19 CFI_ADJUST_CFA_OFFSET 4
20 cli 19 cli
21.endm 20.endm
22 21
23.macro UNLOCK reg 22.macro UNLOCK reg
24 popfl 23 popfl_cfi
25 CFI_ADJUST_CFA_OFFSET -4
26.endm 24.endm
27 25
28#define BEGIN(op) \ 26#define BEGIN(op) \
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index 71e080de3352..391a083674b4 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -14,14 +14,12 @@
14#include <asm/dwarf2.h> 14#include <asm/dwarf2.h>
15 15
16.macro SAVE reg 16.macro SAVE reg
17 pushl %\reg 17 pushl_cfi %\reg
18 CFI_ADJUST_CFA_OFFSET 4
19 CFI_REL_OFFSET \reg, 0 18 CFI_REL_OFFSET \reg, 0
20.endm 19.endm
21 20
22.macro RESTORE reg 21.macro RESTORE reg
23 popl %\reg 22 popl_cfi %\reg
24 CFI_ADJUST_CFA_OFFSET -4
25 CFI_RESTORE \reg 23 CFI_RESTORE \reg
26.endm 24.endm
27 25
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index adbccd0bbb78..78d16a554db0 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -50,11 +50,9 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
50 */ 50 */
51ENTRY(csum_partial) 51ENTRY(csum_partial)
52 CFI_STARTPROC 52 CFI_STARTPROC
53 pushl %esi 53 pushl_cfi %esi
54 CFI_ADJUST_CFA_OFFSET 4
55 CFI_REL_OFFSET esi, 0 54 CFI_REL_OFFSET esi, 0
56 pushl %ebx 55 pushl_cfi %ebx
57 CFI_ADJUST_CFA_OFFSET 4
58 CFI_REL_OFFSET ebx, 0 56 CFI_REL_OFFSET ebx, 0
59 movl 20(%esp),%eax # Function arg: unsigned int sum 57 movl 20(%esp),%eax # Function arg: unsigned int sum
60 movl 16(%esp),%ecx # Function arg: int len 58 movl 16(%esp),%ecx # Function arg: int len
@@ -132,11 +130,9 @@ ENTRY(csum_partial)
132 jz 8f 130 jz 8f
133 roll $8, %eax 131 roll $8, %eax
1348: 1328:
135 popl %ebx 133 popl_cfi %ebx
136 CFI_ADJUST_CFA_OFFSET -4
137 CFI_RESTORE ebx 134 CFI_RESTORE ebx
138 popl %esi 135 popl_cfi %esi
139 CFI_ADJUST_CFA_OFFSET -4
140 CFI_RESTORE esi 136 CFI_RESTORE esi
141 ret 137 ret
142 CFI_ENDPROC 138 CFI_ENDPROC
@@ -148,11 +144,9 @@ ENDPROC(csum_partial)
148 144
149ENTRY(csum_partial) 145ENTRY(csum_partial)
150 CFI_STARTPROC 146 CFI_STARTPROC
151 pushl %esi 147 pushl_cfi %esi
152 CFI_ADJUST_CFA_OFFSET 4
153 CFI_REL_OFFSET esi, 0 148 CFI_REL_OFFSET esi, 0
154 pushl %ebx 149 pushl_cfi %ebx
155 CFI_ADJUST_CFA_OFFSET 4
156 CFI_REL_OFFSET ebx, 0 150 CFI_REL_OFFSET ebx, 0
157 movl 20(%esp),%eax # Function arg: unsigned int sum 151 movl 20(%esp),%eax # Function arg: unsigned int sum
158 movl 16(%esp),%ecx # Function arg: int len 152 movl 16(%esp),%ecx # Function arg: int len
@@ -260,11 +254,9 @@ ENTRY(csum_partial)
260 jz 90f 254 jz 90f
261 roll $8, %eax 255 roll $8, %eax
26290: 25690:
263 popl %ebx 257 popl_cfi %ebx
264 CFI_ADJUST_CFA_OFFSET -4
265 CFI_RESTORE ebx 258 CFI_RESTORE ebx
266 popl %esi 259 popl_cfi %esi
267 CFI_ADJUST_CFA_OFFSET -4
268 CFI_RESTORE esi 260 CFI_RESTORE esi
269 ret 261 ret
270 CFI_ENDPROC 262 CFI_ENDPROC
@@ -309,14 +301,11 @@ ENTRY(csum_partial_copy_generic)
309 CFI_STARTPROC 301 CFI_STARTPROC
310 subl $4,%esp 302 subl $4,%esp
311 CFI_ADJUST_CFA_OFFSET 4 303 CFI_ADJUST_CFA_OFFSET 4
312 pushl %edi 304 pushl_cfi %edi
313 CFI_ADJUST_CFA_OFFSET 4
314 CFI_REL_OFFSET edi, 0 305 CFI_REL_OFFSET edi, 0
315 pushl %esi 306 pushl_cfi %esi
316 CFI_ADJUST_CFA_OFFSET 4
317 CFI_REL_OFFSET esi, 0 307 CFI_REL_OFFSET esi, 0
318 pushl %ebx 308 pushl_cfi %ebx
319 CFI_ADJUST_CFA_OFFSET 4
320 CFI_REL_OFFSET ebx, 0 309 CFI_REL_OFFSET ebx, 0
321 movl ARGBASE+16(%esp),%eax # sum 310 movl ARGBASE+16(%esp),%eax # sum
322 movl ARGBASE+12(%esp),%ecx # len 311 movl ARGBASE+12(%esp),%ecx # len
@@ -426,17 +415,13 @@ DST( movb %cl, (%edi) )
426 415
427.previous 416.previous
428 417
429 popl %ebx 418 popl_cfi %ebx
430 CFI_ADJUST_CFA_OFFSET -4
431 CFI_RESTORE ebx 419 CFI_RESTORE ebx
432 popl %esi 420 popl_cfi %esi
433 CFI_ADJUST_CFA_OFFSET -4
434 CFI_RESTORE esi 421 CFI_RESTORE esi
435 popl %edi 422 popl_cfi %edi
436 CFI_ADJUST_CFA_OFFSET -4
437 CFI_RESTORE edi 423 CFI_RESTORE edi
438 popl %ecx # equivalent to addl $4,%esp 424 popl_cfi %ecx # equivalent to addl $4,%esp
439 CFI_ADJUST_CFA_OFFSET -4
440 ret 425 ret
441 CFI_ENDPROC 426 CFI_ENDPROC
442ENDPROC(csum_partial_copy_generic) 427ENDPROC(csum_partial_copy_generic)
@@ -459,14 +444,11 @@ ENDPROC(csum_partial_copy_generic)
459 444
460ENTRY(csum_partial_copy_generic) 445ENTRY(csum_partial_copy_generic)
461 CFI_STARTPROC 446 CFI_STARTPROC
462 pushl %ebx 447 pushl_cfi %ebx
463 CFI_ADJUST_CFA_OFFSET 4
464 CFI_REL_OFFSET ebx, 0 448 CFI_REL_OFFSET ebx, 0
465 pushl %edi 449 pushl_cfi %edi
466 CFI_ADJUST_CFA_OFFSET 4
467 CFI_REL_OFFSET edi, 0 450 CFI_REL_OFFSET edi, 0
468 pushl %esi 451 pushl_cfi %esi
469 CFI_ADJUST_CFA_OFFSET 4
470 CFI_REL_OFFSET esi, 0 452 CFI_REL_OFFSET esi, 0
471 movl ARGBASE+4(%esp),%esi #src 453 movl ARGBASE+4(%esp),%esi #src
472 movl ARGBASE+8(%esp),%edi #dst 454 movl ARGBASE+8(%esp),%edi #dst
@@ -527,14 +509,11 @@ DST( movb %dl, (%edi) )
527 jmp 7b 509 jmp 7b
528.previous 510.previous
529 511
530 popl %esi 512 popl_cfi %esi
531 CFI_ADJUST_CFA_OFFSET -4
532 CFI_RESTORE esi 513 CFI_RESTORE esi
533 popl %edi 514 popl_cfi %edi
534 CFI_ADJUST_CFA_OFFSET -4
535 CFI_RESTORE edi 515 CFI_RESTORE edi
536 popl %ebx 516 popl_cfi %ebx
537 CFI_ADJUST_CFA_OFFSET -4
538 CFI_RESTORE ebx 517 CFI_RESTORE ebx
539 ret 518 ret
540 CFI_ENDPROC 519 CFI_ENDPROC
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
new file mode 100644
index 000000000000..0ecb8433e5a8
--- /dev/null
+++ b/arch/x86/lib/memmove_64.S
@@ -0,0 +1,197 @@
1/*
2 * Normally compiler builtins are used, but sometimes the compiler calls out
3 * of line code. Based on asm-i386/string.h.
4 *
5 * This assembly file is re-written from memmove_64.c file.
6 * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
7 */
8#define _STRING_C
9#include <linux/linkage.h>
10#include <asm/dwarf2.h>
11
12#undef memmove
13
14/*
15 * Implement memmove(). This can handle overlap between src and dst.
16 *
17 * Input:
18 * rdi: dest
19 * rsi: src
20 * rdx: count
21 *
22 * Output:
23 * rax: dest
24 */
25ENTRY(memmove)
26 CFI_STARTPROC
27 /* Handle more 32bytes in loop */
28 mov %rdi, %rax
29 cmp $0x20, %rdx
30 jb 1f
31
32 /* Decide forward/backward copy mode */
33 cmp %rdi, %rsi
34 jb 2f
35
36 /*
37 * movsq instruction have many startup latency
38 * so we handle small size by general register.
39 */
40 cmp $680, %rdx
41 jb 3f
42 /*
43 * movsq instruction is only good for aligned case.
44 */
45
46 cmpb %dil, %sil
47 je 4f
483:
49 sub $0x20, %rdx
50 /*
51 * We gobble 32byts forward in each loop.
52 */
535:
54 sub $0x20, %rdx
55 movq 0*8(%rsi), %r11
56 movq 1*8(%rsi), %r10
57 movq 2*8(%rsi), %r9
58 movq 3*8(%rsi), %r8
59 leaq 4*8(%rsi), %rsi
60
61 movq %r11, 0*8(%rdi)
62 movq %r10, 1*8(%rdi)
63 movq %r9, 2*8(%rdi)
64 movq %r8, 3*8(%rdi)
65 leaq 4*8(%rdi), %rdi
66 jae 5b
67 addq $0x20, %rdx
68 jmp 1f
69 /*
70 * Handle data forward by movsq.
71 */
72 .p2align 4
734:
74 movq %rdx, %rcx
75 movq -8(%rsi, %rdx), %r11
76 lea -8(%rdi, %rdx), %r10
77 shrq $3, %rcx
78 rep movsq
79 movq %r11, (%r10)
80 jmp 13f
81 /*
82 * Handle data backward by movsq.
83 */
84 .p2align 4
857:
86 movq %rdx, %rcx
87 movq (%rsi), %r11
88 movq %rdi, %r10
89 leaq -8(%rsi, %rdx), %rsi
90 leaq -8(%rdi, %rdx), %rdi
91 shrq $3, %rcx
92 std
93 rep movsq
94 cld
95 movq %r11, (%r10)
96 jmp 13f
97
98 /*
99 * Start to prepare for backward copy.
100 */
101 .p2align 4
1022:
103 cmp $680, %rdx
104 jb 6f
105 cmp %dil, %sil
106 je 7b
1076:
108 /*
109 * Calculate copy position to tail.
110 */
111 addq %rdx, %rsi
112 addq %rdx, %rdi
113 subq $0x20, %rdx
114 /*
115 * We gobble 32byts backward in each loop.
116 */
1178:
118 subq $0x20, %rdx
119 movq -1*8(%rsi), %r11
120 movq -2*8(%rsi), %r10
121 movq -3*8(%rsi), %r9
122 movq -4*8(%rsi), %r8
123 leaq -4*8(%rsi), %rsi
124
125 movq %r11, -1*8(%rdi)
126 movq %r10, -2*8(%rdi)
127 movq %r9, -3*8(%rdi)
128 movq %r8, -4*8(%rdi)
129 leaq -4*8(%rdi), %rdi
130 jae 8b
131 /*
132 * Calculate copy position to head.
133 */
134 addq $0x20, %rdx
135 subq %rdx, %rsi
136 subq %rdx, %rdi
1371:
138 cmpq $16, %rdx
139 jb 9f
140 /*
141 * Move data from 16 bytes to 31 bytes.
142 */
143 movq 0*8(%rsi), %r11
144 movq 1*8(%rsi), %r10
145 movq -2*8(%rsi, %rdx), %r9
146 movq -1*8(%rsi, %rdx), %r8
147 movq %r11, 0*8(%rdi)
148 movq %r10, 1*8(%rdi)
149 movq %r9, -2*8(%rdi, %rdx)
150 movq %r8, -1*8(%rdi, %rdx)
151 jmp 13f
152 .p2align 4
1539:
154 cmpq $8, %rdx
155 jb 10f
156 /*
157 * Move data from 8 bytes to 15 bytes.
158 */
159 movq 0*8(%rsi), %r11
160 movq -1*8(%rsi, %rdx), %r10
161 movq %r11, 0*8(%rdi)
162 movq %r10, -1*8(%rdi, %rdx)
163 jmp 13f
16410:
165 cmpq $4, %rdx
166 jb 11f
167 /*
168 * Move data from 4 bytes to 7 bytes.
169 */
170 movl (%rsi), %r11d
171 movl -4(%rsi, %rdx), %r10d
172 movl %r11d, (%rdi)
173 movl %r10d, -4(%rdi, %rdx)
174 jmp 13f
17511:
176 cmp $2, %rdx
177 jb 12f
178 /*
179 * Move data from 2 bytes to 3 bytes.
180 */
181 movw (%rsi), %r11w
182 movw -2(%rsi, %rdx), %r10w
183 movw %r11w, (%rdi)
184 movw %r10w, -2(%rdi, %rdx)
185 jmp 13f
18612:
187 cmp $1, %rdx
188 jb 13f
189 /*
190 * Move data for 1 byte.
191 */
192 movb (%rsi), %r11b
193 movb %r11b, (%rdi)
19413:
195 retq
196 CFI_ENDPROC
197ENDPROC(memmove)
diff --git a/arch/x86/lib/memmove_64.c b/arch/x86/lib/memmove_64.c
deleted file mode 100644
index 6d0f0ec41b34..000000000000
--- a/arch/x86/lib/memmove_64.c
+++ /dev/null
@@ -1,192 +0,0 @@
1/* Normally compiler builtins are used, but sometimes the compiler calls out
2 of line code. Based on asm-i386/string.h.
3 */
4#define _STRING_C
5#include <linux/string.h>
6#include <linux/module.h>
7
8#undef memmove
9void *memmove(void *dest, const void *src, size_t count)
10{
11 unsigned long d0,d1,d2,d3,d4,d5,d6,d7;
12 char *ret;
13
14 __asm__ __volatile__(
15 /* Handle more 32bytes in loop */
16 "mov %2, %3\n\t"
17 "cmp $0x20, %0\n\t"
18 "jb 1f\n\t"
19
20 /* Decide forward/backward copy mode */
21 "cmp %2, %1\n\t"
22 "jb 2f\n\t"
23
24 /*
25 * movsq instruction have many startup latency
26 * so we handle small size by general register.
27 */
28 "cmp $680, %0\n\t"
29 "jb 3f\n\t"
30 /*
31 * movsq instruction is only good for aligned case.
32 */
33 "cmpb %%dil, %%sil\n\t"
34 "je 4f\n\t"
35 "3:\n\t"
36 "sub $0x20, %0\n\t"
37 /*
38 * We gobble 32byts forward in each loop.
39 */
40 "5:\n\t"
41 "sub $0x20, %0\n\t"
42 "movq 0*8(%1), %4\n\t"
43 "movq 1*8(%1), %5\n\t"
44 "movq 2*8(%1), %6\n\t"
45 "movq 3*8(%1), %7\n\t"
46 "leaq 4*8(%1), %1\n\t"
47
48 "movq %4, 0*8(%2)\n\t"
49 "movq %5, 1*8(%2)\n\t"
50 "movq %6, 2*8(%2)\n\t"
51 "movq %7, 3*8(%2)\n\t"
52 "leaq 4*8(%2), %2\n\t"
53 "jae 5b\n\t"
54 "addq $0x20, %0\n\t"
55 "jmp 1f\n\t"
56 /*
57 * Handle data forward by movsq.
58 */
59 ".p2align 4\n\t"
60 "4:\n\t"
61 "movq %0, %8\n\t"
62 "movq -8(%1, %0), %4\n\t"
63 "lea -8(%2, %0), %5\n\t"
64 "shrq $3, %8\n\t"
65 "rep movsq\n\t"
66 "movq %4, (%5)\n\t"
67 "jmp 13f\n\t"
68 /*
69 * Handle data backward by movsq.
70 */
71 ".p2align 4\n\t"
72 "7:\n\t"
73 "movq %0, %8\n\t"
74 "movq (%1), %4\n\t"
75 "movq %2, %5\n\t"
76 "leaq -8(%1, %0), %1\n\t"
77 "leaq -8(%2, %0), %2\n\t"
78 "shrq $3, %8\n\t"
79 "std\n\t"
80 "rep movsq\n\t"
81 "cld\n\t"
82 "movq %4, (%5)\n\t"
83 "jmp 13f\n\t"
84
85 /*
86 * Start to prepare for backward copy.
87 */
88 ".p2align 4\n\t"
89 "2:\n\t"
90 "cmp $680, %0\n\t"
91 "jb 6f \n\t"
92 "cmp %%dil, %%sil\n\t"
93 "je 7b \n\t"
94 "6:\n\t"
95 /*
96 * Calculate copy position to tail.
97 */
98 "addq %0, %1\n\t"
99 "addq %0, %2\n\t"
100 "subq $0x20, %0\n\t"
101 /*
102 * We gobble 32byts backward in each loop.
103 */
104 "8:\n\t"
105 "subq $0x20, %0\n\t"
106 "movq -1*8(%1), %4\n\t"
107 "movq -2*8(%1), %5\n\t"
108 "movq -3*8(%1), %6\n\t"
109 "movq -4*8(%1), %7\n\t"
110 "leaq -4*8(%1), %1\n\t"
111
112 "movq %4, -1*8(%2)\n\t"
113 "movq %5, -2*8(%2)\n\t"
114 "movq %6, -3*8(%2)\n\t"
115 "movq %7, -4*8(%2)\n\t"
116 "leaq -4*8(%2), %2\n\t"
117 "jae 8b\n\t"
118 /*
119 * Calculate copy position to head.
120 */
121 "addq $0x20, %0\n\t"
122 "subq %0, %1\n\t"
123 "subq %0, %2\n\t"
124 "1:\n\t"
125 "cmpq $16, %0\n\t"
126 "jb 9f\n\t"
127 /*
128 * Move data from 16 bytes to 31 bytes.
129 */
130 "movq 0*8(%1), %4\n\t"
131 "movq 1*8(%1), %5\n\t"
132 "movq -2*8(%1, %0), %6\n\t"
133 "movq -1*8(%1, %0), %7\n\t"
134 "movq %4, 0*8(%2)\n\t"
135 "movq %5, 1*8(%2)\n\t"
136 "movq %6, -2*8(%2, %0)\n\t"
137 "movq %7, -1*8(%2, %0)\n\t"
138 "jmp 13f\n\t"
139 ".p2align 4\n\t"
140 "9:\n\t"
141 "cmpq $8, %0\n\t"
142 "jb 10f\n\t"
143 /*
144 * Move data from 8 bytes to 15 bytes.
145 */
146 "movq 0*8(%1), %4\n\t"
147 "movq -1*8(%1, %0), %5\n\t"
148 "movq %4, 0*8(%2)\n\t"
149 "movq %5, -1*8(%2, %0)\n\t"
150 "jmp 13f\n\t"
151 "10:\n\t"
152 "cmpq $4, %0\n\t"
153 "jb 11f\n\t"
154 /*
155 * Move data from 4 bytes to 7 bytes.
156 */
157 "movl (%1), %4d\n\t"
158 "movl -4(%1, %0), %5d\n\t"
159 "movl %4d, (%2)\n\t"
160 "movl %5d, -4(%2, %0)\n\t"
161 "jmp 13f\n\t"
162 "11:\n\t"
163 "cmp $2, %0\n\t"
164 "jb 12f\n\t"
165 /*
166 * Move data from 2 bytes to 3 bytes.
167 */
168 "movw (%1), %4w\n\t"
169 "movw -2(%1, %0), %5w\n\t"
170 "movw %4w, (%2)\n\t"
171 "movw %5w, -2(%2, %0)\n\t"
172 "jmp 13f\n\t"
173 "12:\n\t"
174 "cmp $1, %0\n\t"
175 "jb 13f\n\t"
176 /*
177 * Move data for 1 byte.
178 */
179 "movb (%1), %4b\n\t"
180 "movb %4b, (%2)\n\t"
181 "13:\n\t"
182 : "=&d" (d0), "=&S" (d1), "=&D" (d2), "=&a" (ret) ,
183 "=r"(d3), "=r"(d4), "=r"(d5), "=r"(d6), "=&c" (d7)
184 :"0" (count),
185 "1" (src),
186 "2" (dest)
187 :"memory");
188
189 return ret;
190
191}
192EXPORT_SYMBOL(memmove);
diff --git a/arch/x86/lib/rwsem_64.S b/arch/x86/lib/rwsem_64.S
index 41fcf00e49df..67743977398b 100644
--- a/arch/x86/lib/rwsem_64.S
+++ b/arch/x86/lib/rwsem_64.S
@@ -23,43 +23,50 @@
23#include <asm/dwarf2.h> 23#include <asm/dwarf2.h>
24 24
25#define save_common_regs \ 25#define save_common_regs \
26 pushq %rdi; \ 26 pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \
27 pushq %rsi; \ 27 pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \
28 pushq %rcx; \ 28 pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \
29 pushq %r8; \ 29 pushq_cfi %r8; CFI_REL_OFFSET r8, 0; \
30 pushq %r9; \ 30 pushq_cfi %r9; CFI_REL_OFFSET r9, 0; \
31 pushq %r10; \ 31 pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \
32 pushq %r11 32 pushq_cfi %r11; CFI_REL_OFFSET r11, 0
33 33
34#define restore_common_regs \ 34#define restore_common_regs \
35 popq %r11; \ 35 popq_cfi %r11; CFI_RESTORE r11; \
36 popq %r10; \ 36 popq_cfi %r10; CFI_RESTORE r10; \
37 popq %r9; \ 37 popq_cfi %r9; CFI_RESTORE r9; \
38 popq %r8; \ 38 popq_cfi %r8; CFI_RESTORE r8; \
39 popq %rcx; \ 39 popq_cfi %rcx; CFI_RESTORE rcx; \
40 popq %rsi; \ 40 popq_cfi %rsi; CFI_RESTORE rsi; \
41 popq %rdi 41 popq_cfi %rdi; CFI_RESTORE rdi
42 42
43/* Fix up special calling conventions */ 43/* Fix up special calling conventions */
44ENTRY(call_rwsem_down_read_failed) 44ENTRY(call_rwsem_down_read_failed)
45 CFI_STARTPROC
45 save_common_regs 46 save_common_regs
46 pushq %rdx 47 pushq_cfi %rdx
48 CFI_REL_OFFSET rdx, 0
47 movq %rax,%rdi 49 movq %rax,%rdi
48 call rwsem_down_read_failed 50 call rwsem_down_read_failed
49 popq %rdx 51 popq_cfi %rdx
52 CFI_RESTORE rdx
50 restore_common_regs 53 restore_common_regs
51 ret 54 ret
52 ENDPROC(call_rwsem_down_read_failed) 55 CFI_ENDPROC
56ENDPROC(call_rwsem_down_read_failed)
53 57
54ENTRY(call_rwsem_down_write_failed) 58ENTRY(call_rwsem_down_write_failed)
59 CFI_STARTPROC
55 save_common_regs 60 save_common_regs
56 movq %rax,%rdi 61 movq %rax,%rdi
57 call rwsem_down_write_failed 62 call rwsem_down_write_failed
58 restore_common_regs 63 restore_common_regs
59 ret 64 ret
60 ENDPROC(call_rwsem_down_write_failed) 65 CFI_ENDPROC
66ENDPROC(call_rwsem_down_write_failed)
61 67
62ENTRY(call_rwsem_wake) 68ENTRY(call_rwsem_wake)
69 CFI_STARTPROC
63 decl %edx /* do nothing if still outstanding active readers */ 70 decl %edx /* do nothing if still outstanding active readers */
64 jnz 1f 71 jnz 1f
65 save_common_regs 72 save_common_regs
@@ -67,15 +74,20 @@ ENTRY(call_rwsem_wake)
67 call rwsem_wake 74 call rwsem_wake
68 restore_common_regs 75 restore_common_regs
691: ret 761: ret
70 ENDPROC(call_rwsem_wake) 77 CFI_ENDPROC
78ENDPROC(call_rwsem_wake)
71 79
72/* Fix up special calling conventions */ 80/* Fix up special calling conventions */
73ENTRY(call_rwsem_downgrade_wake) 81ENTRY(call_rwsem_downgrade_wake)
82 CFI_STARTPROC
74 save_common_regs 83 save_common_regs
75 pushq %rdx 84 pushq_cfi %rdx
85 CFI_REL_OFFSET rdx, 0
76 movq %rax,%rdi 86 movq %rax,%rdi
77 call rwsem_downgrade_wake 87 call rwsem_downgrade_wake
78 popq %rdx 88 popq_cfi %rdx
89 CFI_RESTORE rdx
79 restore_common_regs 90 restore_common_regs
80 ret 91 ret
81 ENDPROC(call_rwsem_downgrade_wake) 92 CFI_ENDPROC
93ENDPROC(call_rwsem_downgrade_wake)
diff --git a/arch/x86/lib/semaphore_32.S b/arch/x86/lib/semaphore_32.S
index 648fe4741782..06691daa4108 100644
--- a/arch/x86/lib/semaphore_32.S
+++ b/arch/x86/lib/semaphore_32.S
@@ -36,7 +36,7 @@
36 */ 36 */
37#ifdef CONFIG_SMP 37#ifdef CONFIG_SMP
38ENTRY(__write_lock_failed) 38ENTRY(__write_lock_failed)
39 CFI_STARTPROC simple 39 CFI_STARTPROC
40 FRAME 40 FRAME
412: LOCK_PREFIX 412: LOCK_PREFIX
42 addl $ RW_LOCK_BIAS,(%eax) 42 addl $ RW_LOCK_BIAS,(%eax)
@@ -74,29 +74,23 @@ ENTRY(__read_lock_failed)
74/* Fix up special calling conventions */ 74/* Fix up special calling conventions */
75ENTRY(call_rwsem_down_read_failed) 75ENTRY(call_rwsem_down_read_failed)
76 CFI_STARTPROC 76 CFI_STARTPROC
77 push %ecx 77 pushl_cfi %ecx
78 CFI_ADJUST_CFA_OFFSET 4
79 CFI_REL_OFFSET ecx,0 78 CFI_REL_OFFSET ecx,0
80 push %edx 79 pushl_cfi %edx
81 CFI_ADJUST_CFA_OFFSET 4
82 CFI_REL_OFFSET edx,0 80 CFI_REL_OFFSET edx,0
83 call rwsem_down_read_failed 81 call rwsem_down_read_failed
84 pop %edx 82 popl_cfi %edx
85 CFI_ADJUST_CFA_OFFSET -4 83 popl_cfi %ecx
86 pop %ecx
87 CFI_ADJUST_CFA_OFFSET -4
88 ret 84 ret
89 CFI_ENDPROC 85 CFI_ENDPROC
90 ENDPROC(call_rwsem_down_read_failed) 86 ENDPROC(call_rwsem_down_read_failed)
91 87
92ENTRY(call_rwsem_down_write_failed) 88ENTRY(call_rwsem_down_write_failed)
93 CFI_STARTPROC 89 CFI_STARTPROC
94 push %ecx 90 pushl_cfi %ecx
95 CFI_ADJUST_CFA_OFFSET 4
96 CFI_REL_OFFSET ecx,0 91 CFI_REL_OFFSET ecx,0
97 calll rwsem_down_write_failed 92 calll rwsem_down_write_failed
98 pop %ecx 93 popl_cfi %ecx
99 CFI_ADJUST_CFA_OFFSET -4
100 ret 94 ret
101 CFI_ENDPROC 95 CFI_ENDPROC
102 ENDPROC(call_rwsem_down_write_failed) 96 ENDPROC(call_rwsem_down_write_failed)
@@ -105,12 +99,10 @@ ENTRY(call_rwsem_wake)
105 CFI_STARTPROC 99 CFI_STARTPROC
106 decw %dx /* do nothing if still outstanding active readers */ 100 decw %dx /* do nothing if still outstanding active readers */
107 jnz 1f 101 jnz 1f
108 push %ecx 102 pushl_cfi %ecx
109 CFI_ADJUST_CFA_OFFSET 4
110 CFI_REL_OFFSET ecx,0 103 CFI_REL_OFFSET ecx,0
111 call rwsem_wake 104 call rwsem_wake
112 pop %ecx 105 popl_cfi %ecx
113 CFI_ADJUST_CFA_OFFSET -4
1141: ret 1061: ret
115 CFI_ENDPROC 107 CFI_ENDPROC
116 ENDPROC(call_rwsem_wake) 108 ENDPROC(call_rwsem_wake)
@@ -118,17 +110,13 @@ ENTRY(call_rwsem_wake)
118/* Fix up special calling conventions */ 110/* Fix up special calling conventions */
119ENTRY(call_rwsem_downgrade_wake) 111ENTRY(call_rwsem_downgrade_wake)
120 CFI_STARTPROC 112 CFI_STARTPROC
121 push %ecx 113 pushl_cfi %ecx
122 CFI_ADJUST_CFA_OFFSET 4
123 CFI_REL_OFFSET ecx,0 114 CFI_REL_OFFSET ecx,0
124 push %edx 115 pushl_cfi %edx
125 CFI_ADJUST_CFA_OFFSET 4
126 CFI_REL_OFFSET edx,0 116 CFI_REL_OFFSET edx,0
127 call rwsem_downgrade_wake 117 call rwsem_downgrade_wake
128 pop %edx 118 popl_cfi %edx
129 CFI_ADJUST_CFA_OFFSET -4 119 popl_cfi %ecx
130 pop %ecx
131 CFI_ADJUST_CFA_OFFSET -4
132 ret 120 ret
133 CFI_ENDPROC 121 CFI_ENDPROC
134 ENDPROC(call_rwsem_downgrade_wake) 122 ENDPROC(call_rwsem_downgrade_wake)
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
index 650b11e00ecc..2930ae05d773 100644
--- a/arch/x86/lib/thunk_32.S
+++ b/arch/x86/lib/thunk_32.S
@@ -7,24 +7,6 @@
7 7
8 #include <linux/linkage.h> 8 #include <linux/linkage.h>
9 9
10#define ARCH_TRACE_IRQS_ON \
11 pushl %eax; \
12 pushl %ecx; \
13 pushl %edx; \
14 call trace_hardirqs_on; \
15 popl %edx; \
16 popl %ecx; \
17 popl %eax;
18
19#define ARCH_TRACE_IRQS_OFF \
20 pushl %eax; \
21 pushl %ecx; \
22 pushl %edx; \
23 call trace_hardirqs_off; \
24 popl %edx; \
25 popl %ecx; \
26 popl %eax;
27
28#ifdef CONFIG_TRACE_IRQFLAGS 10#ifdef CONFIG_TRACE_IRQFLAGS
29 /* put return address in eax (arg1) */ 11 /* put return address in eax (arg1) */
30 .macro thunk_ra name,func 12 .macro thunk_ra name,func
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index bf9a7d5a5428..782b082c9ff7 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -22,26 +22,6 @@
22 CFI_ENDPROC 22 CFI_ENDPROC
23 .endm 23 .endm
24 24
25 /* rdi: arg1 ... normal C conventions. rax is passed from C. */
26 .macro thunk_retrax name,func
27 .globl \name
28\name:
29 CFI_STARTPROC
30 SAVE_ARGS
31 call \func
32 jmp restore_norax
33 CFI_ENDPROC
34 .endm
35
36
37 .section .sched.text, "ax"
38#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
39 thunk rwsem_down_read_failed_thunk,rwsem_down_read_failed
40 thunk rwsem_down_write_failed_thunk,rwsem_down_write_failed
41 thunk rwsem_wake_thunk,rwsem_wake
42 thunk rwsem_downgrade_thunk,rwsem_downgrade_wake
43#endif
44
45#ifdef CONFIG_TRACE_IRQFLAGS 25#ifdef CONFIG_TRACE_IRQFLAGS
46 /* put return address in rdi (arg1) */ 26 /* put return address in rdi (arg1) */
47 .macro thunk_ra name,func 27 .macro thunk_ra name,func
@@ -72,10 +52,3 @@ restore:
72 RESTORE_ARGS 52 RESTORE_ARGS
73 ret 53 ret
74 CFI_ENDPROC 54 CFI_ENDPROC
75
76 CFI_STARTPROC
77 SAVE_ARGS
78restore_norax:
79 RESTORE_ARGS 1
80 ret
81 CFI_ENDPROC
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 09df2f9a3d69..3e608edf9958 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
25obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o 25obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o
26obj-$(CONFIG_AMD_NUMA) += amdtopology_64.o 26obj-$(CONFIG_AMD_NUMA) += amdtopology_64.o
27obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o 27obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o
28obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
28 29
29obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o 30obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
30 31
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
index f21962c435ed..0919c26820d4 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -26,9 +26,7 @@
26#include <asm/apic.h> 26#include <asm/apic.h>
27#include <asm/amd_nb.h> 27#include <asm/amd_nb.h>
28 28
29static struct bootnode __initdata nodes[8];
30static unsigned char __initdata nodeids[8]; 29static unsigned char __initdata nodeids[8];
31static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;
32 30
33static __init int find_northbridge(void) 31static __init int find_northbridge(void)
34{ 32{
@@ -51,7 +49,7 @@ static __init int find_northbridge(void)
51 return num; 49 return num;
52 } 50 }
53 51
54 return -1; 52 return -ENOENT;
55} 53}
56 54
57static __init void early_get_boot_cpu_id(void) 55static __init void early_get_boot_cpu_id(void)
@@ -69,17 +67,18 @@ static __init void early_get_boot_cpu_id(void)
69#endif 67#endif
70} 68}
71 69
72int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) 70int __init amd_numa_init(void)
73{ 71{
74 unsigned long start = PFN_PHYS(start_pfn); 72 unsigned long start = PFN_PHYS(0);
75 unsigned long end = PFN_PHYS(end_pfn); 73 unsigned long end = PFN_PHYS(max_pfn);
76 unsigned numnodes; 74 unsigned numnodes;
77 unsigned long prevbase; 75 unsigned long prevbase;
78 int i, nb, found = 0; 76 int i, j, nb;
79 u32 nodeid, reg; 77 u32 nodeid, reg;
78 unsigned int bits, cores, apicid_base;
80 79
81 if (!early_pci_allowed()) 80 if (!early_pci_allowed())
82 return -1; 81 return -EINVAL;
83 82
84 nb = find_northbridge(); 83 nb = find_northbridge();
85 if (nb < 0) 84 if (nb < 0)
@@ -90,7 +89,7 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
90 reg = read_pci_config(0, nb, 0, 0x60); 89 reg = read_pci_config(0, nb, 0, 0x60);
91 numnodes = ((reg >> 4) & 0xF) + 1; 90 numnodes = ((reg >> 4) & 0xF) + 1;
92 if (numnodes <= 1) 91 if (numnodes <= 1)
93 return -1; 92 return -ENOENT;
94 93
95 pr_info("Number of physical nodes %d\n", numnodes); 94 pr_info("Number of physical nodes %d\n", numnodes);
96 95
@@ -121,9 +120,9 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
121 if ((base >> 8) & 3 || (limit >> 8) & 3) { 120 if ((base >> 8) & 3 || (limit >> 8) & 3) {
122 pr_err("Node %d using interleaving mode %lx/%lx\n", 121 pr_err("Node %d using interleaving mode %lx/%lx\n",
123 nodeid, (base >> 8) & 3, (limit >> 8) & 3); 122 nodeid, (base >> 8) & 3, (limit >> 8) & 3);
124 return -1; 123 return -EINVAL;
125 } 124 }
126 if (node_isset(nodeid, nodes_parsed)) { 125 if (node_isset(nodeid, numa_nodes_parsed)) {
127 pr_info("Node %d already present, skipping\n", 126 pr_info("Node %d already present, skipping\n",
128 nodeid); 127 nodeid);
129 continue; 128 continue;
@@ -160,117 +159,28 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
160 if (prevbase > base) { 159 if (prevbase > base) {
161 pr_err("Node map not sorted %lx,%lx\n", 160 pr_err("Node map not sorted %lx,%lx\n",
162 prevbase, base); 161 prevbase, base);
163 return -1; 162 return -EINVAL;
164 } 163 }
165 164
166 pr_info("Node %d MemBase %016lx Limit %016lx\n", 165 pr_info("Node %d MemBase %016lx Limit %016lx\n",
167 nodeid, base, limit); 166 nodeid, base, limit);
168 167
169 found++;
170
171 nodes[nodeid].start = base;
172 nodes[nodeid].end = limit;
173
174 prevbase = base; 168 prevbase = base;
175 169 numa_add_memblk(nodeid, base, limit);
176 node_set(nodeid, nodes_parsed); 170 node_set(nodeid, numa_nodes_parsed);
177 }
178
179 if (!found)
180 return -1;
181 return 0;
182}
183
184#ifdef CONFIG_NUMA_EMU
185static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
186 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
187};
188
189void __init amd_get_nodes(struct bootnode *physnodes)
190{
191 int i;
192
193 for_each_node_mask(i, nodes_parsed) {
194 physnodes[i].start = nodes[i].start;
195 physnodes[i].end = nodes[i].end;
196 } 171 }
197}
198
199static int __init find_node_by_addr(unsigned long addr)
200{
201 int ret = NUMA_NO_NODE;
202 int i;
203
204 for (i = 0; i < 8; i++)
205 if (addr >= nodes[i].start && addr < nodes[i].end) {
206 ret = i;
207 break;
208 }
209 return ret;
210}
211 172
212/* 173 if (!nodes_weight(numa_nodes_parsed))
213 * For NUMA emulation, fake proximity domain (_PXM) to node id mappings must be 174 return -ENOENT;
214 * setup to represent the physical topology but reflect the emulated
215 * environment. For each emulated node, the real node which it appears on is
216 * found and a fake pxm to nid mapping is created which mirrors the actual
217 * locality. node_distance() then represents the correct distances between
218 * emulated nodes by using the fake acpi mappings to pxms.
219 */
220void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes)
221{
222 unsigned int bits;
223 unsigned int cores;
224 unsigned int apicid_base = 0;
225 int i;
226 175
176 /*
177 * We seem to have valid NUMA configuration. Map apicids to nodes
178 * using the coreid bits from early_identify_cpu.
179 */
227 bits = boot_cpu_data.x86_coreid_bits; 180 bits = boot_cpu_data.x86_coreid_bits;
228 cores = 1 << bits; 181 cores = 1 << bits;
229 early_get_boot_cpu_id();
230 if (boot_cpu_physical_apicid > 0)
231 apicid_base = boot_cpu_physical_apicid;
232
233 for (i = 0; i < nr_nodes; i++) {
234 int index;
235 int nid;
236 int j;
237
238 nid = find_node_by_addr(nodes[i].start);
239 if (nid == NUMA_NO_NODE)
240 continue;
241
242 index = nodeids[nid] << bits;
243 if (fake_apicid_to_node[index + apicid_base] == NUMA_NO_NODE)
244 for (j = apicid_base; j < cores + apicid_base; j++)
245 fake_apicid_to_node[index + j] = i;
246#ifdef CONFIG_ACPI_NUMA
247 __acpi_map_pxm_to_node(nid, i);
248#endif
249 }
250 memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
251}
252#endif /* CONFIG_NUMA_EMU */
253
254int __init amd_scan_nodes(void)
255{
256 unsigned int bits;
257 unsigned int cores;
258 unsigned int apicid_base;
259 int i;
260
261 BUG_ON(nodes_empty(nodes_parsed));
262 node_possible_map = nodes_parsed;
263 memnode_shift = compute_hash_shift(nodes, 8, NULL);
264 if (memnode_shift < 0) {
265 pr_err("No NUMA node hash function found. Contact maintainer\n");
266 return -1;
267 }
268 pr_info("Using node hash shift of %d\n", memnode_shift);
269
270 /* use the coreid bits from early_identify_cpu */
271 bits = boot_cpu_data.x86_coreid_bits;
272 cores = (1<<bits);
273 apicid_base = 0; 182 apicid_base = 0;
183
274 /* get the APIC ID of the BSP early for systems with apicid lifting */ 184 /* get the APIC ID of the BSP early for systems with apicid lifting */
275 early_get_boot_cpu_id(); 185 early_get_boot_cpu_id();
276 if (boot_cpu_physical_apicid > 0) { 186 if (boot_cpu_physical_apicid > 0) {
@@ -278,17 +188,9 @@ int __init amd_scan_nodes(void)
278 apicid_base = boot_cpu_physical_apicid; 188 apicid_base = boot_cpu_physical_apicid;
279 } 189 }
280 190
281 for_each_node_mask(i, node_possible_map) { 191 for_each_node_mask(i, numa_nodes_parsed)
282 int j;
283
284 memblock_x86_register_active_regions(i,
285 nodes[i].start >> PAGE_SHIFT,
286 nodes[i].end >> PAGE_SHIFT);
287 for (j = apicid_base; j < cores + apicid_base; j++) 192 for (j = apicid_base; j < cores + apicid_base; j++)
288 apicid_to_node[(i << bits) + j] = i; 193 set_apicid_to_node((i << bits) + j, i);
289 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
290 }
291 194
292 numa_init_array();
293 return 0; 195 return 0;
294} 196}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 7d90ceb882a4..20e3f8702d1e 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -229,15 +229,14 @@ void vmalloc_sync_all(void)
229 for (address = VMALLOC_START & PMD_MASK; 229 for (address = VMALLOC_START & PMD_MASK;
230 address >= TASK_SIZE && address < FIXADDR_TOP; 230 address >= TASK_SIZE && address < FIXADDR_TOP;
231 address += PMD_SIZE) { 231 address += PMD_SIZE) {
232
233 unsigned long flags;
234 struct page *page; 232 struct page *page;
235 233
236 spin_lock_irqsave(&pgd_lock, flags); 234 spin_lock(&pgd_lock);
237 list_for_each_entry(page, &pgd_list, lru) { 235 list_for_each_entry(page, &pgd_list, lru) {
238 spinlock_t *pgt_lock; 236 spinlock_t *pgt_lock;
239 pmd_t *ret; 237 pmd_t *ret;
240 238
239 /* the pgt_lock only for Xen */
241 pgt_lock = &pgd_page_get_mm(page)->page_table_lock; 240 pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
242 241
243 spin_lock(pgt_lock); 242 spin_lock(pgt_lock);
@@ -247,7 +246,7 @@ void vmalloc_sync_all(void)
247 if (!ret) 246 if (!ret)
248 break; 247 break;
249 } 248 }
250 spin_unlock_irqrestore(&pgd_lock, flags); 249 spin_unlock(&pgd_lock);
251 } 250 }
252} 251}
253 252
@@ -828,6 +827,13 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
828 unsigned long address, unsigned int fault) 827 unsigned long address, unsigned int fault)
829{ 828{
830 if (fault & VM_FAULT_OOM) { 829 if (fault & VM_FAULT_OOM) {
830 /* Kernel mode? Handle exceptions or die: */
831 if (!(error_code & PF_USER)) {
832 up_read(&current->mm->mmap_sem);
833 no_context(regs, error_code, address);
834 return;
835 }
836
831 out_of_memory(regs, error_code, address); 837 out_of_memory(regs, error_code, address);
832 } else { 838 } else {
833 if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| 839 if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 947f42abe820..286d289b039b 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -18,9 +18,9 @@
18 18
19DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 19DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
20 20
21unsigned long __initdata e820_table_start; 21unsigned long __initdata pgt_buf_start;
22unsigned long __meminitdata e820_table_end; 22unsigned long __meminitdata pgt_buf_end;
23unsigned long __meminitdata e820_table_top; 23unsigned long __meminitdata pgt_buf_top;
24 24
25int after_bootmem; 25int after_bootmem;
26 26
@@ -33,7 +33,7 @@ int direct_gbpages
33static void __init find_early_table_space(unsigned long end, int use_pse, 33static void __init find_early_table_space(unsigned long end, int use_pse,
34 int use_gbpages) 34 int use_gbpages)
35{ 35{
36 unsigned long puds, pmds, ptes, tables, start; 36 unsigned long puds, pmds, ptes, tables, start = 0, good_end = end;
37 phys_addr_t base; 37 phys_addr_t base;
38 38
39 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; 39 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
@@ -65,29 +65,20 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
65#ifdef CONFIG_X86_32 65#ifdef CONFIG_X86_32
66 /* for fixmap */ 66 /* for fixmap */
67 tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); 67 tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
68#endif
69 68
70 /* 69 good_end = max_pfn_mapped << PAGE_SHIFT;
71 * RED-PEN putting page tables only on node 0 could
72 * cause a hotspot and fill up ZONE_DMA. The page tables
73 * need roughly 0.5KB per GB.
74 */
75#ifdef CONFIG_X86_32
76 start = 0x7000;
77#else
78 start = 0x8000;
79#endif 70#endif
80 base = memblock_find_in_range(start, max_pfn_mapped<<PAGE_SHIFT, 71
81 tables, PAGE_SIZE); 72 base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE);
82 if (base == MEMBLOCK_ERROR) 73 if (base == MEMBLOCK_ERROR)
83 panic("Cannot find space for the kernel page tables"); 74 panic("Cannot find space for the kernel page tables");
84 75
85 e820_table_start = base >> PAGE_SHIFT; 76 pgt_buf_start = base >> PAGE_SHIFT;
86 e820_table_end = e820_table_start; 77 pgt_buf_end = pgt_buf_start;
87 e820_table_top = e820_table_start + (tables >> PAGE_SHIFT); 78 pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT);
88 79
89 printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", 80 printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
90 end, e820_table_start << PAGE_SHIFT, e820_table_top << PAGE_SHIFT); 81 end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT);
91} 82}
92 83
93struct map_range { 84struct map_range {
@@ -279,30 +270,11 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
279 load_cr3(swapper_pg_dir); 270 load_cr3(swapper_pg_dir);
280#endif 271#endif
281 272
282#ifdef CONFIG_X86_64
283 if (!after_bootmem && !start) {
284 pud_t *pud;
285 pmd_t *pmd;
286
287 mmu_cr4_features = read_cr4();
288
289 /*
290 * _brk_end cannot change anymore, but it and _end may be
291 * located on different 2M pages. cleanup_highmap(), however,
292 * can only consider _end when it runs, so destroy any
293 * mappings beyond _brk_end here.
294 */
295 pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
296 pmd = pmd_offset(pud, _brk_end - 1);
297 while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
298 pmd_clear(pmd);
299 }
300#endif
301 __flush_tlb_all(); 273 __flush_tlb_all();
302 274
303 if (!after_bootmem && e820_table_end > e820_table_start) 275 if (!after_bootmem && pgt_buf_end > pgt_buf_start)
304 memblock_x86_reserve_range(e820_table_start << PAGE_SHIFT, 276 memblock_x86_reserve_range(pgt_buf_start << PAGE_SHIFT,
305 e820_table_end << PAGE_SHIFT, "PGTABLE"); 277 pgt_buf_end << PAGE_SHIFT, "PGTABLE");
306 278
307 if (!after_bootmem) 279 if (!after_bootmem)
308 early_memtest(start, end); 280 early_memtest(start, end);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index c821074b7f0b..73ad7ebd6e9c 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -62,10 +62,10 @@ bool __read_mostly __vmalloc_start_set = false;
62 62
63static __init void *alloc_low_page(void) 63static __init void *alloc_low_page(void)
64{ 64{
65 unsigned long pfn = e820_table_end++; 65 unsigned long pfn = pgt_buf_end++;
66 void *adr; 66 void *adr;
67 67
68 if (pfn >= e820_table_top) 68 if (pfn >= pgt_buf_top)
69 panic("alloc_low_page: ran out of memory"); 69 panic("alloc_low_page: ran out of memory");
70 70
71 adr = __va(pfn * PAGE_SIZE); 71 adr = __va(pfn * PAGE_SIZE);
@@ -163,8 +163,8 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
163 if (pmd_idx_kmap_begin != pmd_idx_kmap_end 163 if (pmd_idx_kmap_begin != pmd_idx_kmap_end
164 && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin 164 && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
165 && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end 165 && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end
166 && ((__pa(pte) >> PAGE_SHIFT) < e820_table_start 166 && ((__pa(pte) >> PAGE_SHIFT) < pgt_buf_start
167 || (__pa(pte) >> PAGE_SHIFT) >= e820_table_end)) { 167 || (__pa(pte) >> PAGE_SHIFT) >= pgt_buf_end)) {
168 pte_t *newpte; 168 pte_t *newpte;
169 int i; 169 int i;
170 170
@@ -644,8 +644,7 @@ void __init find_low_pfn_range(void)
644} 644}
645 645
646#ifndef CONFIG_NEED_MULTIPLE_NODES 646#ifndef CONFIG_NEED_MULTIPLE_NODES
647void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, 647void __init initmem_init(void)
648 int acpi, int k8)
649{ 648{
650#ifdef CONFIG_HIGHMEM 649#ifdef CONFIG_HIGHMEM
651 highstart_pfn = highend_pfn = max_pfn; 650 highstart_pfn = highend_pfn = max_pfn;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 71a59296af80..a08a62cb136e 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -105,18 +105,18 @@ void sync_global_pgds(unsigned long start, unsigned long end)
105 105
106 for (address = start; address <= end; address += PGDIR_SIZE) { 106 for (address = start; address <= end; address += PGDIR_SIZE) {
107 const pgd_t *pgd_ref = pgd_offset_k(address); 107 const pgd_t *pgd_ref = pgd_offset_k(address);
108 unsigned long flags;
109 struct page *page; 108 struct page *page;
110 109
111 if (pgd_none(*pgd_ref)) 110 if (pgd_none(*pgd_ref))
112 continue; 111 continue;
113 112
114 spin_lock_irqsave(&pgd_lock, flags); 113 spin_lock(&pgd_lock);
115 list_for_each_entry(page, &pgd_list, lru) { 114 list_for_each_entry(page, &pgd_list, lru) {
116 pgd_t *pgd; 115 pgd_t *pgd;
117 spinlock_t *pgt_lock; 116 spinlock_t *pgt_lock;
118 117
119 pgd = (pgd_t *)page_address(page) + pgd_index(address); 118 pgd = (pgd_t *)page_address(page) + pgd_index(address);
119 /* the pgt_lock only for Xen */
120 pgt_lock = &pgd_page_get_mm(page)->page_table_lock; 120 pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
121 spin_lock(pgt_lock); 121 spin_lock(pgt_lock);
122 122
@@ -128,7 +128,7 @@ void sync_global_pgds(unsigned long start, unsigned long end)
128 128
129 spin_unlock(pgt_lock); 129 spin_unlock(pgt_lock);
130 } 130 }
131 spin_unlock_irqrestore(&pgd_lock, flags); 131 spin_unlock(&pgd_lock);
132 } 132 }
133} 133}
134 134
@@ -314,7 +314,7 @@ void __init cleanup_highmap(void)
314 314
315static __ref void *alloc_low_page(unsigned long *phys) 315static __ref void *alloc_low_page(unsigned long *phys)
316{ 316{
317 unsigned long pfn = e820_table_end++; 317 unsigned long pfn = pgt_buf_end++;
318 void *adr; 318 void *adr;
319 319
320 if (after_bootmem) { 320 if (after_bootmem) {
@@ -324,7 +324,7 @@ static __ref void *alloc_low_page(unsigned long *phys)
324 return adr; 324 return adr;
325 } 325 }
326 326
327 if (pfn >= e820_table_top) 327 if (pfn >= pgt_buf_top)
328 panic("alloc_low_page: ran out of memory"); 328 panic("alloc_low_page: ran out of memory");
329 329
330 adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); 330 adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
@@ -333,12 +333,28 @@ static __ref void *alloc_low_page(unsigned long *phys)
333 return adr; 333 return adr;
334} 334}
335 335
336static __ref void *map_low_page(void *virt)
337{
338 void *adr;
339 unsigned long phys, left;
340
341 if (after_bootmem)
342 return virt;
343
344 phys = __pa(virt);
345 left = phys & (PAGE_SIZE - 1);
346 adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE);
347 adr = (void *)(((unsigned long)adr) | left);
348
349 return adr;
350}
351
336static __ref void unmap_low_page(void *adr) 352static __ref void unmap_low_page(void *adr)
337{ 353{
338 if (after_bootmem) 354 if (after_bootmem)
339 return; 355 return;
340 356
341 early_iounmap(adr, PAGE_SIZE); 357 early_iounmap((void *)((unsigned long)adr & PAGE_MASK), PAGE_SIZE);
342} 358}
343 359
344static unsigned long __meminit 360static unsigned long __meminit
@@ -386,15 +402,6 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
386} 402}
387 403
388static unsigned long __meminit 404static unsigned long __meminit
389phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end,
390 pgprot_t prot)
391{
392 pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
393
394 return phys_pte_init(pte, address, end, prot);
395}
396
397static unsigned long __meminit
398phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, 405phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
399 unsigned long page_size_mask, pgprot_t prot) 406 unsigned long page_size_mask, pgprot_t prot)
400{ 407{
@@ -420,8 +427,10 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
420 if (pmd_val(*pmd)) { 427 if (pmd_val(*pmd)) {
421 if (!pmd_large(*pmd)) { 428 if (!pmd_large(*pmd)) {
422 spin_lock(&init_mm.page_table_lock); 429 spin_lock(&init_mm.page_table_lock);
423 last_map_addr = phys_pte_update(pmd, address, 430 pte = map_low_page((pte_t *)pmd_page_vaddr(*pmd));
431 last_map_addr = phys_pte_init(pte, address,
424 end, prot); 432 end, prot);
433 unmap_low_page(pte);
425 spin_unlock(&init_mm.page_table_lock); 434 spin_unlock(&init_mm.page_table_lock);
426 continue; 435 continue;
427 } 436 }
@@ -468,18 +477,6 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
468} 477}
469 478
470static unsigned long __meminit 479static unsigned long __meminit
471phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
472 unsigned long page_size_mask, pgprot_t prot)
473{
474 pmd_t *pmd = pmd_offset(pud, 0);
475 unsigned long last_map_addr;
476
477 last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask, prot);
478 __flush_tlb_all();
479 return last_map_addr;
480}
481
482static unsigned long __meminit
483phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, 480phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
484 unsigned long page_size_mask) 481 unsigned long page_size_mask)
485{ 482{
@@ -504,8 +501,11 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
504 501
505 if (pud_val(*pud)) { 502 if (pud_val(*pud)) {
506 if (!pud_large(*pud)) { 503 if (!pud_large(*pud)) {
507 last_map_addr = phys_pmd_update(pud, addr, end, 504 pmd = map_low_page(pmd_offset(pud, 0));
505 last_map_addr = phys_pmd_init(pmd, addr, end,
508 page_size_mask, prot); 506 page_size_mask, prot);
507 unmap_low_page(pmd);
508 __flush_tlb_all();
509 continue; 509 continue;
510 } 510 }
511 /* 511 /*
@@ -553,17 +553,6 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
553 return last_map_addr; 553 return last_map_addr;
554} 554}
555 555
556static unsigned long __meminit
557phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
558 unsigned long page_size_mask)
559{
560 pud_t *pud;
561
562 pud = (pud_t *)pgd_page_vaddr(*pgd);
563
564 return phys_pud_init(pud, addr, end, page_size_mask);
565}
566
567unsigned long __meminit 556unsigned long __meminit
568kernel_physical_mapping_init(unsigned long start, 557kernel_physical_mapping_init(unsigned long start,
569 unsigned long end, 558 unsigned long end,
@@ -587,8 +576,10 @@ kernel_physical_mapping_init(unsigned long start,
587 next = end; 576 next = end;
588 577
589 if (pgd_val(*pgd)) { 578 if (pgd_val(*pgd)) {
590 last_map_addr = phys_pud_update(pgd, __pa(start), 579 pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd));
580 last_map_addr = phys_pud_init(pud, __pa(start),
591 __pa(end), page_size_mask); 581 __pa(end), page_size_mask);
582 unmap_low_page(pud);
592 continue; 583 continue;
593 } 584 }
594 585
@@ -612,10 +603,9 @@ kernel_physical_mapping_init(unsigned long start,
612} 603}
613 604
614#ifndef CONFIG_NUMA 605#ifndef CONFIG_NUMA
615void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, 606void __init initmem_init(void)
616 int acpi, int k8)
617{ 607{
618 memblock_x86_register_active_regions(0, start_pfn, end_pfn); 608 memblock_x86_register_active_regions(0, 0, max_pfn);
619} 609}
620#endif 610#endif
621 611
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index ebf6d7887a38..9559d360fde7 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -26,12 +26,50 @@ static __init int numa_setup(char *opt)
26early_param("numa", numa_setup); 26early_param("numa", numa_setup);
27 27
28/* 28/*
29 * Which logical CPUs are on which nodes 29 * apicid, cpu, node mappings
30 */ 30 */
31s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
32 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
33};
34
31cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; 35cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
32EXPORT_SYMBOL(node_to_cpumask_map); 36EXPORT_SYMBOL(node_to_cpumask_map);
33 37
34/* 38/*
39 * Map cpu index to node index
40 */
41DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
42EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
43
44void __cpuinit numa_set_node(int cpu, int node)
45{
46 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
47
48 /* early setting, no percpu area yet */
49 if (cpu_to_node_map) {
50 cpu_to_node_map[cpu] = node;
51 return;
52 }
53
54#ifdef CONFIG_DEBUG_PER_CPU_MAPS
55 if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
56 printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
57 dump_stack();
58 return;
59 }
60#endif
61 per_cpu(x86_cpu_to_node_map, cpu) = node;
62
63 if (node != NUMA_NO_NODE)
64 set_cpu_numa_node(cpu, node);
65}
66
67void __cpuinit numa_clear_node(int cpu)
68{
69 numa_set_node(cpu, NUMA_NO_NODE);
70}
71
72/*
35 * Allocate node_to_cpumask_map based on number of available nodes 73 * Allocate node_to_cpumask_map based on number of available nodes
36 * Requires node_possible_map to be valid. 74 * Requires node_possible_map to be valid.
37 * 75 *
@@ -57,7 +95,174 @@ void __init setup_node_to_cpumask_map(void)
57 pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); 95 pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
58} 96}
59 97
60#ifdef CONFIG_DEBUG_PER_CPU_MAPS 98/*
99 * There are unfortunately some poorly designed mainboards around that
100 * only connect memory to a single CPU. This breaks the 1:1 cpu->node
101 * mapping. To avoid this fill in the mapping for all possible CPUs,
102 * as the number of CPUs is not known yet. We round robin the existing
103 * nodes.
104 */
105void __init numa_init_array(void)
106{
107 int rr, i;
108
109 rr = first_node(node_online_map);
110 for (i = 0; i < nr_cpu_ids; i++) {
111 if (early_cpu_to_node(i) != NUMA_NO_NODE)
112 continue;
113 numa_set_node(i, rr);
114 rr = next_node(rr, node_online_map);
115 if (rr == MAX_NUMNODES)
116 rr = first_node(node_online_map);
117 }
118}
119
120static __init int find_near_online_node(int node)
121{
122 int n, val;
123 int min_val = INT_MAX;
124 int best_node = -1;
125
126 for_each_online_node(n) {
127 val = node_distance(node, n);
128
129 if (val < min_val) {
130 min_val = val;
131 best_node = n;
132 }
133 }
134
135 return best_node;
136}
137
138/*
139 * Setup early cpu_to_node.
140 *
141 * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
142 * and apicid_to_node[] tables have valid entries for a CPU.
143 * This means we skip cpu_to_node[] initialisation for NUMA
144 * emulation and faking node case (when running a kernel compiled
145 * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
146 * is already initialized in a round robin manner at numa_init_array,
147 * prior to this call, and this initialization is good enough
148 * for the fake NUMA cases.
149 *
150 * Called before the per_cpu areas are setup.
151 */
152void __init init_cpu_to_node(void)
153{
154 int cpu;
155 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
156
157 BUG_ON(cpu_to_apicid == NULL);
158
159 for_each_possible_cpu(cpu) {
160 int node = numa_cpu_node(cpu);
161
162 if (node == NUMA_NO_NODE)
163 continue;
164 if (!node_online(node))
165 node = find_near_online_node(node);
166 numa_set_node(cpu, node);
167 }
168}
169
170#ifndef CONFIG_DEBUG_PER_CPU_MAPS
171
172# ifndef CONFIG_NUMA_EMU
173void __cpuinit numa_add_cpu(int cpu)
174{
175 cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
176}
177
178void __cpuinit numa_remove_cpu(int cpu)
179{
180 cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
181}
182# endif /* !CONFIG_NUMA_EMU */
183
184#else /* !CONFIG_DEBUG_PER_CPU_MAPS */
185
186int __cpu_to_node(int cpu)
187{
188 if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
189 printk(KERN_WARNING
190 "cpu_to_node(%d): usage too early!\n", cpu);
191 dump_stack();
192 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
193 }
194 return per_cpu(x86_cpu_to_node_map, cpu);
195}
196EXPORT_SYMBOL(__cpu_to_node);
197
198/*
199 * Same function as cpu_to_node() but used if called before the
200 * per_cpu areas are setup.
201 */
202int early_cpu_to_node(int cpu)
203{
204 if (early_per_cpu_ptr(x86_cpu_to_node_map))
205 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
206
207 if (!cpu_possible(cpu)) {
208 printk(KERN_WARNING
209 "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
210 dump_stack();
211 return NUMA_NO_NODE;
212 }
213 return per_cpu(x86_cpu_to_node_map, cpu);
214}
215
216struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable)
217{
218 int node = early_cpu_to_node(cpu);
219 struct cpumask *mask;
220 char buf[64];
221
222 if (node == NUMA_NO_NODE) {
223 /* early_cpu_to_node() already emits a warning and trace */
224 return NULL;
225 }
226 mask = node_to_cpumask_map[node];
227 if (!mask) {
228 pr_err("node_to_cpumask_map[%i] NULL\n", node);
229 dump_stack();
230 return NULL;
231 }
232
233 cpulist_scnprintf(buf, sizeof(buf), mask);
234 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
235 enable ? "numa_add_cpu" : "numa_remove_cpu",
236 cpu, node, buf);
237 return mask;
238}
239
240# ifndef CONFIG_NUMA_EMU
241static void __cpuinit numa_set_cpumask(int cpu, int enable)
242{
243 struct cpumask *mask;
244
245 mask = debug_cpumask_set_cpu(cpu, enable);
246 if (!mask)
247 return;
248
249 if (enable)
250 cpumask_set_cpu(cpu, mask);
251 else
252 cpumask_clear_cpu(cpu, mask);
253}
254
255void __cpuinit numa_add_cpu(int cpu)
256{
257 numa_set_cpumask(cpu, 1);
258}
259
260void __cpuinit numa_remove_cpu(int cpu)
261{
262 numa_set_cpumask(cpu, 0);
263}
264# endif /* !CONFIG_NUMA_EMU */
265
61/* 266/*
62 * Returns a pointer to the bitmask of CPUs on Node 'node'. 267 * Returns a pointer to the bitmask of CPUs on Node 'node'.
63 */ 268 */
@@ -80,4 +285,5 @@ const struct cpumask *cpumask_of_node(int node)
80 return node_to_cpumask_map[node]; 285 return node_to_cpumask_map[node];
81} 286}
82EXPORT_SYMBOL(cpumask_of_node); 287EXPORT_SYMBOL(cpumask_of_node);
83#endif 288
289#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 84a3e4c9f277..bde3906420df 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -110,6 +110,12 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
110 110
111static unsigned long kva_start_pfn; 111static unsigned long kva_start_pfn;
112static unsigned long kva_pages; 112static unsigned long kva_pages;
113
114int __cpuinit numa_cpu_node(int cpu)
115{
116 return apic->x86_32_numa_cpu_node(cpu);
117}
118
113/* 119/*
114 * FLAT - support for basic PC memory model with discontig enabled, essentially 120 * FLAT - support for basic PC memory model with discontig enabled, essentially
115 * a single node with all available processors in it with a flat 121 * a single node with all available processors in it with a flat
@@ -346,8 +352,7 @@ static void init_remap_allocator(int nid)
346 (ulong) node_remap_end_vaddr[nid]); 352 (ulong) node_remap_end_vaddr[nid]);
347} 353}
348 354
349void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, 355void __init initmem_init(void)
350 int acpi, int k8)
351{ 356{
352 int nid; 357 int nid;
353 long kva_target_pfn; 358 long kva_target_pfn;
@@ -361,6 +366,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
361 */ 366 */
362 367
363 get_memcfg_numa(); 368 get_memcfg_numa();
369 numa_init_array();
364 370
365 kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE); 371 kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE);
366 372
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 95ea1551eebc..9ec0f209a6a4 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -13,31 +13,30 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/nodemask.h> 14#include <linux/nodemask.h>
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/acpi.h>
16 17
17#include <asm/e820.h> 18#include <asm/e820.h>
18#include <asm/proto.h> 19#include <asm/proto.h>
19#include <asm/dma.h> 20#include <asm/dma.h>
20#include <asm/numa.h>
21#include <asm/acpi.h> 21#include <asm/acpi.h>
22#include <asm/amd_nb.h> 22#include <asm/amd_nb.h>
23 23
24#include "numa_internal.h"
25
24struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; 26struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
25EXPORT_SYMBOL(node_data); 27EXPORT_SYMBOL(node_data);
26 28
27struct memnode memnode; 29nodemask_t numa_nodes_parsed __initdata;
28 30
29s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { 31struct memnode memnode;
30 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
31};
32 32
33static unsigned long __initdata nodemap_addr; 33static unsigned long __initdata nodemap_addr;
34static unsigned long __initdata nodemap_size; 34static unsigned long __initdata nodemap_size;
35 35
36/* 36static struct numa_meminfo numa_meminfo __initdata;
37 * Map cpu index to node index 37
38 */ 38static int numa_distance_cnt;
39DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); 39static u8 *numa_distance;
40EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
41 40
42/* 41/*
43 * Given a shift value, try to populate memnodemap[] 42 * Given a shift value, try to populate memnodemap[]
@@ -46,16 +45,15 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
46 * 0 if memnodmap[] too small (of shift too small) 45 * 0 if memnodmap[] too small (of shift too small)
47 * -1 if node overlap or lost ram (shift too big) 46 * -1 if node overlap or lost ram (shift too big)
48 */ 47 */
49static int __init populate_memnodemap(const struct bootnode *nodes, 48static int __init populate_memnodemap(const struct numa_meminfo *mi, int shift)
50 int numnodes, int shift, int *nodeids)
51{ 49{
52 unsigned long addr, end; 50 unsigned long addr, end;
53 int i, res = -1; 51 int i, res = -1;
54 52
55 memset(memnodemap, 0xff, sizeof(s16)*memnodemapsize); 53 memset(memnodemap, 0xff, sizeof(s16)*memnodemapsize);
56 for (i = 0; i < numnodes; i++) { 54 for (i = 0; i < mi->nr_blks; i++) {
57 addr = nodes[i].start; 55 addr = mi->blk[i].start;
58 end = nodes[i].end; 56 end = mi->blk[i].end;
59 if (addr >= end) 57 if (addr >= end)
60 continue; 58 continue;
61 if ((end >> shift) >= memnodemapsize) 59 if ((end >> shift) >= memnodemapsize)
@@ -63,12 +61,7 @@ static int __init populate_memnodemap(const struct bootnode *nodes,
63 do { 61 do {
64 if (memnodemap[addr >> shift] != NUMA_NO_NODE) 62 if (memnodemap[addr >> shift] != NUMA_NO_NODE)
65 return -1; 63 return -1;
66 64 memnodemap[addr >> shift] = mi->blk[i].nid;
67 if (!nodeids)
68 memnodemap[addr >> shift] = i;
69 else
70 memnodemap[addr >> shift] = nodeids[i];
71
72 addr += (1UL << shift); 65 addr += (1UL << shift);
73 } while (addr < end); 66 } while (addr < end);
74 res = 1; 67 res = 1;
@@ -86,7 +79,7 @@ static int __init allocate_cachealigned_memnodemap(void)
86 79
87 addr = 0x8000; 80 addr = 0x8000;
88 nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES); 81 nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
89 nodemap_addr = memblock_find_in_range(addr, max_pfn<<PAGE_SHIFT, 82 nodemap_addr = memblock_find_in_range(addr, get_max_mapped(),
90 nodemap_size, L1_CACHE_BYTES); 83 nodemap_size, L1_CACHE_BYTES);
91 if (nodemap_addr == MEMBLOCK_ERROR) { 84 if (nodemap_addr == MEMBLOCK_ERROR) {
92 printk(KERN_ERR 85 printk(KERN_ERR
@@ -106,16 +99,15 @@ static int __init allocate_cachealigned_memnodemap(void)
106 * The LSB of all start and end addresses in the node map is the value of the 99 * The LSB of all start and end addresses in the node map is the value of the
107 * maximum possible shift. 100 * maximum possible shift.
108 */ 101 */
109static int __init extract_lsb_from_nodes(const struct bootnode *nodes, 102static int __init extract_lsb_from_nodes(const struct numa_meminfo *mi)
110 int numnodes)
111{ 103{
112 int i, nodes_used = 0; 104 int i, nodes_used = 0;
113 unsigned long start, end; 105 unsigned long start, end;
114 unsigned long bitfield = 0, memtop = 0; 106 unsigned long bitfield = 0, memtop = 0;
115 107
116 for (i = 0; i < numnodes; i++) { 108 for (i = 0; i < mi->nr_blks; i++) {
117 start = nodes[i].start; 109 start = mi->blk[i].start;
118 end = nodes[i].end; 110 end = mi->blk[i].end;
119 if (start >= end) 111 if (start >= end)
120 continue; 112 continue;
121 bitfield |= start; 113 bitfield |= start;
@@ -131,18 +123,17 @@ static int __init extract_lsb_from_nodes(const struct bootnode *nodes,
131 return i; 123 return i;
132} 124}
133 125
134int __init compute_hash_shift(struct bootnode *nodes, int numnodes, 126static int __init compute_hash_shift(const struct numa_meminfo *mi)
135 int *nodeids)
136{ 127{
137 int shift; 128 int shift;
138 129
139 shift = extract_lsb_from_nodes(nodes, numnodes); 130 shift = extract_lsb_from_nodes(mi);
140 if (allocate_cachealigned_memnodemap()) 131 if (allocate_cachealigned_memnodemap())
141 return -1; 132 return -1;
142 printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", 133 printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
143 shift); 134 shift);
144 135
145 if (populate_memnodemap(nodes, numnodes, shift, nodeids) != 1) { 136 if (populate_memnodemap(mi, shift) != 1) {
146 printk(KERN_INFO "Your memory is not aligned you need to " 137 printk(KERN_INFO "Your memory is not aligned you need to "
147 "rebuild your kernel with a bigger NODEMAPSIZE " 138 "rebuild your kernel with a bigger NODEMAPSIZE "
148 "shift=%d\n", shift); 139 "shift=%d\n", shift);
@@ -188,6 +179,63 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
188 return NULL; 179 return NULL;
189} 180}
190 181
182static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
183 struct numa_meminfo *mi)
184{
185 /* ignore zero length blks */
186 if (start == end)
187 return 0;
188
189 /* whine about and ignore invalid blks */
190 if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
191 pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n",
192 nid, start, end);
193 return 0;
194 }
195
196 if (mi->nr_blks >= NR_NODE_MEMBLKS) {
197 pr_err("NUMA: too many memblk ranges\n");
198 return -EINVAL;
199 }
200
201 mi->blk[mi->nr_blks].start = start;
202 mi->blk[mi->nr_blks].end = end;
203 mi->blk[mi->nr_blks].nid = nid;
204 mi->nr_blks++;
205 return 0;
206}
207
208/**
209 * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo
210 * @idx: Index of memblk to remove
211 * @mi: numa_meminfo to remove memblk from
212 *
213 * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and
214 * decrementing @mi->nr_blks.
215 */
216void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
217{
218 mi->nr_blks--;
219 memmove(&mi->blk[idx], &mi->blk[idx + 1],
220 (mi->nr_blks - idx) * sizeof(mi->blk[0]));
221}
222
223/**
224 * numa_add_memblk - Add one numa_memblk to numa_meminfo
225 * @nid: NUMA node ID of the new memblk
226 * @start: Start address of the new memblk
227 * @end: End address of the new memblk
228 *
229 * Add a new memblk to the default numa_meminfo.
230 *
231 * RETURNS:
232 * 0 on success, -errno on failure.
233 */
234int __init numa_add_memblk(int nid, u64 start, u64 end)
235{
236 return numa_add_memblk_to(nid, start, end, &numa_meminfo);
237}
238
191/* Initialize bootmem allocator for a node */ 239/* Initialize bootmem allocator for a node */
192void __init 240void __init
193setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) 241setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
@@ -234,696 +282,386 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
234 node_set_online(nodeid); 282 node_set_online(nodeid);
235} 283}
236 284
237/* 285/**
238 * There are unfortunately some poorly designed mainboards around that 286 * numa_cleanup_meminfo - Cleanup a numa_meminfo
239 * only connect memory to a single CPU. This breaks the 1:1 cpu->node 287 * @mi: numa_meminfo to clean up
240 * mapping. To avoid this fill in the mapping for all possible CPUs, 288 *
241 * as the number of CPUs is not known yet. We round robin the existing 289 * Sanitize @mi by merging and removing unncessary memblks. Also check for
242 * nodes. 290 * conflicts and clear unused memblks.
291 *
292 * RETURNS:
293 * 0 on success, -errno on failure.
243 */ 294 */
244void __init numa_init_array(void) 295int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
245{ 296{
246 int rr, i; 297 const u64 low = 0;
298 const u64 high = (u64)max_pfn << PAGE_SHIFT;
299 int i, j, k;
247 300
248 rr = first_node(node_online_map); 301 for (i = 0; i < mi->nr_blks; i++) {
249 for (i = 0; i < nr_cpu_ids; i++) { 302 struct numa_memblk *bi = &mi->blk[i];
250 if (early_cpu_to_node(i) != NUMA_NO_NODE)
251 continue;
252 numa_set_node(i, rr);
253 rr = next_node(rr, node_online_map);
254 if (rr == MAX_NUMNODES)
255 rr = first_node(node_online_map);
256 }
257}
258
259#ifdef CONFIG_NUMA_EMU
260/* Numa emulation */
261static struct bootnode nodes[MAX_NUMNODES] __initdata;
262static struct bootnode physnodes[MAX_NUMNODES] __cpuinitdata;
263static char *cmdline __initdata;
264 303
265void __init numa_emu_cmdline(char *str) 304 /* make sure all blocks are inside the limits */
266{ 305 bi->start = max(bi->start, low);
267 cmdline = str; 306 bi->end = min(bi->end, high);
268}
269 307
270static int __init setup_physnodes(unsigned long start, unsigned long end, 308 /* and there's no empty block */
271 int acpi, int amd) 309 if (bi->start == bi->end) {
272{ 310 numa_remove_memblk_from(i--, mi);
273 int ret = 0;
274 int i;
275
276 memset(physnodes, 0, sizeof(physnodes));
277#ifdef CONFIG_ACPI_NUMA
278 if (acpi)
279 acpi_get_nodes(physnodes, start, end);
280#endif
281#ifdef CONFIG_AMD_NUMA
282 if (amd)
283 amd_get_nodes(physnodes);
284#endif
285 /*
286 * Basic sanity checking on the physical node map: there may be errors
287 * if the SRAT or AMD code incorrectly reported the topology or the mem=
288 * kernel parameter is used.
289 */
290 for (i = 0; i < MAX_NUMNODES; i++) {
291 if (physnodes[i].start == physnodes[i].end)
292 continue;
293 if (physnodes[i].start > end) {
294 physnodes[i].end = physnodes[i].start;
295 continue;
296 }
297 if (physnodes[i].end < start) {
298 physnodes[i].start = physnodes[i].end;
299 continue; 311 continue;
300 } 312 }
301 if (physnodes[i].start < start)
302 physnodes[i].start = start;
303 if (physnodes[i].end > end)
304 physnodes[i].end = end;
305 ret++;
306 }
307
308 /*
309 * If no physical topology was detected, a single node is faked to cover
310 * the entire address space.
311 */
312 if (!ret) {
313 physnodes[ret].start = start;
314 physnodes[ret].end = end;
315 ret = 1;
316 }
317 return ret;
318}
319
320static void __init fake_physnodes(int acpi, int amd, int nr_nodes)
321{
322 int i;
323
324 BUG_ON(acpi && amd);
325#ifdef CONFIG_ACPI_NUMA
326 if (acpi)
327 acpi_fake_nodes(nodes, nr_nodes);
328#endif
329#ifdef CONFIG_AMD_NUMA
330 if (amd)
331 amd_fake_nodes(nodes, nr_nodes);
332#endif
333 if (!acpi && !amd)
334 for (i = 0; i < nr_cpu_ids; i++)
335 numa_set_node(i, 0);
336}
337
338/*
339 * Setups up nid to range from addr to addr + size. If the end
340 * boundary is greater than max_addr, then max_addr is used instead.
341 * The return value is 0 if there is additional memory left for
342 * allocation past addr and -1 otherwise. addr is adjusted to be at
343 * the end of the node.
344 */
345static int __init setup_node_range(int nid, u64 *addr, u64 size, u64 max_addr)
346{
347 int ret = 0;
348 nodes[nid].start = *addr;
349 *addr += size;
350 if (*addr >= max_addr) {
351 *addr = max_addr;
352 ret = -1;
353 }
354 nodes[nid].end = *addr;
355 node_set(nid, node_possible_map);
356 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid,
357 nodes[nid].start, nodes[nid].end,
358 (nodes[nid].end - nodes[nid].start) >> 20);
359 return ret;
360}
361
362/*
363 * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
364 * to max_addr. The return value is the number of nodes allocated.
365 */
366static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes)
367{
368 nodemask_t physnode_mask = NODE_MASK_NONE;
369 u64 size;
370 int big;
371 int ret = 0;
372 int i;
373
374 if (nr_nodes <= 0)
375 return -1;
376 if (nr_nodes > MAX_NUMNODES) {
377 pr_info("numa=fake=%d too large, reducing to %d\n",
378 nr_nodes, MAX_NUMNODES);
379 nr_nodes = MAX_NUMNODES;
380 }
381
382 size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / nr_nodes;
383 /*
384 * Calculate the number of big nodes that can be allocated as a result
385 * of consolidating the remainder.
386 */
387 big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * nr_nodes) /
388 FAKE_NODE_MIN_SIZE;
389
390 size &= FAKE_NODE_MIN_HASH_MASK;
391 if (!size) {
392 pr_err("Not enough memory for each node. "
393 "NUMA emulation disabled.\n");
394 return -1;
395 }
396 313
397 for (i = 0; i < MAX_NUMNODES; i++) 314 for (j = i + 1; j < mi->nr_blks; j++) {
398 if (physnodes[i].start != physnodes[i].end) 315 struct numa_memblk *bj = &mi->blk[j];
399 node_set(i, physnode_mask); 316 unsigned long start, end;
400
401 /*
402 * Continue to fill physical nodes with fake nodes until there is no
403 * memory left on any of them.
404 */
405 while (nodes_weight(physnode_mask)) {
406 for_each_node_mask(i, physnode_mask) {
407 u64 end = physnodes[i].start + size;
408 u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
409
410 if (ret < big)
411 end += FAKE_NODE_MIN_SIZE;
412 317
413 /* 318 /*
414 * Continue to add memory to this fake node if its 319 * See whether there are overlapping blocks. Whine
415 * non-reserved memory is less than the per-node size. 320 * about but allow overlaps of the same nid. They
321 * will be merged below.
416 */ 322 */
417 while (end - physnodes[i].start - 323 if (bi->end > bj->start && bi->start < bj->end) {
418 memblock_x86_hole_size(physnodes[i].start, end) < size) { 324 if (bi->nid != bj->nid) {
419 end += FAKE_NODE_MIN_SIZE; 325 pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n",
420 if (end > physnodes[i].end) { 326 bi->nid, bi->start, bi->end,
421 end = physnodes[i].end; 327 bj->nid, bj->start, bj->end);
422 break; 328 return -EINVAL;
423 } 329 }
330 pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n",
331 bi->nid, bi->start, bi->end,
332 bj->start, bj->end);
424 } 333 }
425 334
426 /* 335 /*
427 * If there won't be at least FAKE_NODE_MIN_SIZE of 336 * Join together blocks on the same node, holes
428 * non-reserved memory in ZONE_DMA32 for the next node, 337 * between which don't overlap with memory on other
429 * this one must extend to the boundary. 338 * nodes.
430 */ 339 */
431 if (end < dma32_end && dma32_end - end - 340 if (bi->nid != bj->nid)
432 memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) 341 continue;
433 end = dma32_end; 342 start = max(min(bi->start, bj->start), low);
434 343 end = min(max(bi->end, bj->end), high);
435 /* 344 for (k = 0; k < mi->nr_blks; k++) {
436 * If there won't be enough non-reserved memory for the 345 struct numa_memblk *bk = &mi->blk[k];
437 * next node, this one must extend to the end of the 346
438 * physical node. 347 if (bi->nid == bk->nid)
439 */ 348 continue;
440 if (physnodes[i].end - end - 349 if (start < bk->end && end > bk->start)
441 memblock_x86_hole_size(end, physnodes[i].end) < size) 350 break;
442 end = physnodes[i].end; 351 }
443 352 if (k < mi->nr_blks)
444 /* 353 continue;
445 * Avoid allocating more nodes than requested, which can 354 printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n",
446 * happen as a result of rounding down each node's size 355 bi->nid, bi->start, bi->end, bj->start, bj->end,
447 * to FAKE_NODE_MIN_SIZE. 356 start, end);
448 */ 357 bi->start = start;
449 if (nodes_weight(physnode_mask) + ret >= nr_nodes) 358 bi->end = end;
450 end = physnodes[i].end; 359 numa_remove_memblk_from(j--, mi);
451
452 if (setup_node_range(ret++, &physnodes[i].start,
453 end - physnodes[i].start,
454 physnodes[i].end) < 0)
455 node_clear(i, physnode_mask);
456 } 360 }
457 } 361 }
458 return ret;
459}
460 362
461/* 363 for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) {
462 * Returns the end address of a node so that there is at least `size' amount of 364 mi->blk[i].start = mi->blk[i].end = 0;
463 * non-reserved memory or `max_addr' is reached. 365 mi->blk[i].nid = NUMA_NO_NODE;
464 */
465static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
466{
467 u64 end = start + size;
468
469 while (end - start - memblock_x86_hole_size(start, end) < size) {
470 end += FAKE_NODE_MIN_SIZE;
471 if (end > max_addr) {
472 end = max_addr;
473 break;
474 }
475 } 366 }
476 return end; 367
368 return 0;
477} 369}
478 370
479/* 371/*
480 * Sets up fake nodes of `size' interleaved over physical nodes ranging from 372 * Set nodes, which have memory in @mi, in *@nodemask.
481 * `addr' to `max_addr'. The return value is the number of nodes allocated.
482 */ 373 */
483static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size) 374static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask,
375 const struct numa_meminfo *mi)
484{ 376{
485 nodemask_t physnode_mask = NODE_MASK_NONE;
486 u64 min_size;
487 int ret = 0;
488 int i; 377 int i;
489 378
490 if (!size) 379 for (i = 0; i < ARRAY_SIZE(mi->blk); i++)
491 return -1; 380 if (mi->blk[i].start != mi->blk[i].end &&
492 /* 381 mi->blk[i].nid != NUMA_NO_NODE)
493 * The limit on emulated nodes is MAX_NUMNODES, so the size per node is 382 node_set(mi->blk[i].nid, *nodemask);
494 * increased accordingly if the requested size is too small. This 383}
495 * creates a uniform distribution of node sizes across the entire
496 * machine (but not necessarily over physical nodes).
497 */
498 min_size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) /
499 MAX_NUMNODES;
500 min_size = max(min_size, FAKE_NODE_MIN_SIZE);
501 if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size)
502 min_size = (min_size + FAKE_NODE_MIN_SIZE) &
503 FAKE_NODE_MIN_HASH_MASK;
504 if (size < min_size) {
505 pr_err("Fake node size %LuMB too small, increasing to %LuMB\n",
506 size >> 20, min_size >> 20);
507 size = min_size;
508 }
509 size &= FAKE_NODE_MIN_HASH_MASK;
510
511 for (i = 0; i < MAX_NUMNODES; i++)
512 if (physnodes[i].start != physnodes[i].end)
513 node_set(i, physnode_mask);
514 /*
515 * Fill physical nodes with fake nodes of size until there is no memory
516 * left on any of them.
517 */
518 while (nodes_weight(physnode_mask)) {
519 for_each_node_mask(i, physnode_mask) {
520 u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT;
521 u64 end;
522
523 end = find_end_of_node(physnodes[i].start,
524 physnodes[i].end, size);
525 /*
526 * If there won't be at least FAKE_NODE_MIN_SIZE of
527 * non-reserved memory in ZONE_DMA32 for the next node,
528 * this one must extend to the boundary.
529 */
530 if (end < dma32_end && dma32_end - end -
531 memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
532 end = dma32_end;
533 384
534 /* 385/**
535 * If there won't be enough non-reserved memory for the 386 * numa_reset_distance - Reset NUMA distance table
536 * next node, this one must extend to the end of the 387 *
537 * physical node. 388 * The current table is freed. The next numa_set_distance() call will
538 */ 389 * create a new one.
539 if (physnodes[i].end - end - 390 */
540 memblock_x86_hole_size(end, physnodes[i].end) < size) 391void __init numa_reset_distance(void)
541 end = physnodes[i].end; 392{
393 size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]);
542 394
543 /* 395 /* numa_distance could be 1LU marking allocation failure, test cnt */
544 * Setup the fake node that will be allocated as bootmem 396 if (numa_distance_cnt)
545 * later. If setup_node_range() returns non-zero, there 397 memblock_x86_free_range(__pa(numa_distance),
546 * is no more memory available on this physical node. 398 __pa(numa_distance) + size);
547 */ 399 numa_distance_cnt = 0;
548 if (setup_node_range(ret++, &physnodes[i].start, 400 numa_distance = NULL; /* enable table creation */
549 end - physnodes[i].start,
550 physnodes[i].end) < 0)
551 node_clear(i, physnode_mask);
552 }
553 }
554 return ret;
555} 401}
556 402
557/* 403static int __init numa_alloc_distance(void)
558 * Sets up the system RAM area from start_pfn to last_pfn according to the
559 * numa=fake command-line option.
560 */
561static int __init numa_emulation(unsigned long start_pfn,
562 unsigned long last_pfn, int acpi, int amd)
563{ 404{
564 u64 addr = start_pfn << PAGE_SHIFT; 405 nodemask_t nodes_parsed;
565 u64 max_addr = last_pfn << PAGE_SHIFT; 406 size_t size;
566 int num_nodes; 407 int i, j, cnt = 0;
567 int i; 408 u64 phys;
568 409
569 /* 410 /* size the new table and allocate it */
570 * If the numa=fake command-line contains a 'M' or 'G', it represents 411 nodes_parsed = numa_nodes_parsed;
571 * the fixed node size. Otherwise, if it is just a single number N, 412 numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo);
572 * split the system RAM into N fake nodes.
573 */
574 if (strchr(cmdline, 'M') || strchr(cmdline, 'G')) {
575 u64 size;
576 413
577 size = memparse(cmdline, &cmdline); 414 for_each_node_mask(i, nodes_parsed)
578 num_nodes = split_nodes_size_interleave(addr, max_addr, size); 415 cnt = i;
579 } else { 416 cnt++;
580 unsigned long n; 417 size = cnt * cnt * sizeof(numa_distance[0]);
581 418
582 n = simple_strtoul(cmdline, NULL, 0); 419 phys = memblock_find_in_range(0, (u64)max_pfn_mapped << PAGE_SHIFT,
583 num_nodes = split_nodes_interleave(addr, max_addr, n); 420 size, PAGE_SIZE);
421 if (phys == MEMBLOCK_ERROR) {
422 pr_warning("NUMA: Warning: can't allocate distance table!\n");
423 /* don't retry until explicitly reset */
424 numa_distance = (void *)1LU;
425 return -ENOMEM;
584 } 426 }
427 memblock_x86_reserve_range(phys, phys + size, "NUMA DIST");
585 428
586 if (num_nodes < 0) 429 numa_distance = __va(phys);
587 return num_nodes; 430 numa_distance_cnt = cnt;
588 memnode_shift = compute_hash_shift(nodes, num_nodes, NULL); 431
589 if (memnode_shift < 0) { 432 /* fill with the default distances */
590 memnode_shift = 0; 433 for (i = 0; i < cnt; i++)
591 printk(KERN_ERR "No NUMA hash function found. NUMA emulation " 434 for (j = 0; j < cnt; j++)
592 "disabled.\n"); 435 numa_distance[i * cnt + j] = i == j ?
593 return -1; 436 LOCAL_DISTANCE : REMOTE_DISTANCE;
594 } 437 printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt);
595 438
596 /*
597 * We need to vacate all active ranges that may have been registered for
598 * the e820 memory map.
599 */
600 remove_all_active_ranges();
601 for_each_node_mask(i, node_possible_map) {
602 memblock_x86_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
603 nodes[i].end >> PAGE_SHIFT);
604 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
605 }
606 setup_physnodes(addr, max_addr, acpi, amd);
607 fake_physnodes(acpi, amd, num_nodes);
608 numa_init_array();
609 return 0; 439 return 0;
610} 440}
611#endif /* CONFIG_NUMA_EMU */
612 441
613void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn, 442/**
614 int acpi, int amd) 443 * numa_set_distance - Set NUMA distance from one NUMA to another
444 * @from: the 'from' node to set distance
445 * @to: the 'to' node to set distance
446 * @distance: NUMA distance
447 *
448 * Set the distance from node @from to @to to @distance. If distance table
449 * doesn't exist, one which is large enough to accomodate all the currently
450 * known nodes will be created.
451 *
452 * If such table cannot be allocated, a warning is printed and further
453 * calls are ignored until the distance table is reset with
454 * numa_reset_distance().
455 *
456 * If @from or @to is higher than the highest known node at the time of
457 * table creation or @distance doesn't make sense, the call is ignored.
458 * This is to allow simplification of specific NUMA config implementations.
459 */
460void __init numa_set_distance(int from, int to, int distance)
615{ 461{
616 int i; 462 if (!numa_distance && numa_alloc_distance() < 0)
617
618 nodes_clear(node_possible_map);
619 nodes_clear(node_online_map);
620
621#ifdef CONFIG_NUMA_EMU
622 setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT,
623 acpi, amd);
624 if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd))
625 return; 463 return;
626 setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT,
627 acpi, amd);
628 nodes_clear(node_possible_map);
629 nodes_clear(node_online_map);
630#endif
631 464
632#ifdef CONFIG_ACPI_NUMA 465 if (from >= numa_distance_cnt || to >= numa_distance_cnt) {
633 if (!numa_off && acpi && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, 466 printk_once(KERN_DEBUG "NUMA: Debug: distance out of bound, from=%d to=%d distance=%d\n",
634 last_pfn << PAGE_SHIFT)) 467 from, to, distance);
635 return; 468 return;
636 nodes_clear(node_possible_map); 469 }
637 nodes_clear(node_online_map);
638#endif
639 470
640#ifdef CONFIG_AMD_NUMA 471 if ((u8)distance != distance ||
641 if (!numa_off && amd && !amd_scan_nodes()) 472 (from == to && distance != LOCAL_DISTANCE)) {
473 pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
474 from, to, distance);
642 return; 475 return;
643 nodes_clear(node_possible_map); 476 }
644 nodes_clear(node_online_map);
645#endif
646 printk(KERN_INFO "%s\n",
647 numa_off ? "NUMA turned off" : "No NUMA configuration found");
648 477
649 printk(KERN_INFO "Faking a node at %016lx-%016lx\n", 478 numa_distance[from * numa_distance_cnt + to] = distance;
650 start_pfn << PAGE_SHIFT,
651 last_pfn << PAGE_SHIFT);
652 /* setup dummy node covering all memory */
653 memnode_shift = 63;
654 memnodemap = memnode.embedded_map;
655 memnodemap[0] = 0;
656 node_set_online(0);
657 node_set(0, node_possible_map);
658 for (i = 0; i < nr_cpu_ids; i++)
659 numa_set_node(i, 0);
660 memblock_x86_register_active_regions(0, start_pfn, last_pfn);
661 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT);
662} 479}
663 480
664unsigned long __init numa_free_all_bootmem(void) 481int __node_distance(int from, int to)
665{ 482{
666 unsigned long pages = 0; 483 if (from >= numa_distance_cnt || to >= numa_distance_cnt)
667 int i; 484 return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
485 return numa_distance[from * numa_distance_cnt + to];
486}
487EXPORT_SYMBOL(__node_distance);
668 488
669 for_each_online_node(i) 489/*
670 pages += free_all_bootmem_node(NODE_DATA(i)); 490 * Sanity check to catch more bad NUMA configurations (they are amazingly
491 * common). Make sure the nodes cover all memory.
492 */
493static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
494{
495 unsigned long numaram, e820ram;
496 int i;
671 497
672 pages += free_all_memory_core_early(MAX_NUMNODES); 498 numaram = 0;
499 for (i = 0; i < mi->nr_blks; i++) {
500 unsigned long s = mi->blk[i].start >> PAGE_SHIFT;
501 unsigned long e = mi->blk[i].end >> PAGE_SHIFT;
502 numaram += e - s;
503 numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
504 if ((long)numaram < 0)
505 numaram = 0;
506 }
673 507
674 return pages; 508 e820ram = max_pfn - (memblock_x86_hole_size(0,
509 max_pfn << PAGE_SHIFT) >> PAGE_SHIFT);
510 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
511 if ((long)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
512 printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n",
513 (numaram << PAGE_SHIFT) >> 20,
514 (e820ram << PAGE_SHIFT) >> 20);
515 return false;
516 }
517 return true;
675} 518}
676 519
677#ifdef CONFIG_NUMA 520static int __init numa_register_memblks(struct numa_meminfo *mi)
678
679static __init int find_near_online_node(int node)
680{ 521{
681 int n, val; 522 int i, nid;
682 int min_val = INT_MAX;
683 int best_node = -1;
684 523
685 for_each_online_node(n) { 524 /* Account for nodes with cpus and no memory */
686 val = node_distance(node, n); 525 node_possible_map = numa_nodes_parsed;
526 numa_nodemask_from_meminfo(&node_possible_map, mi);
527 if (WARN_ON(nodes_empty(node_possible_map)))
528 return -EINVAL;
529
530 memnode_shift = compute_hash_shift(mi);
531 if (memnode_shift < 0) {
532 printk(KERN_ERR "NUMA: No NUMA node hash function found. Contact maintainer\n");
533 return -EINVAL;
534 }
687 535
688 if (val < min_val) { 536 for (i = 0; i < mi->nr_blks; i++)
689 min_val = val; 537 memblock_x86_register_active_regions(mi->blk[i].nid,
690 best_node = n; 538 mi->blk[i].start >> PAGE_SHIFT,
539 mi->blk[i].end >> PAGE_SHIFT);
540
541 /* for out of order entries */
542 sort_node_map();
543 if (!numa_meminfo_cover_memory(mi))
544 return -EINVAL;
545
546 /* Finally register nodes. */
547 for_each_node_mask(nid, node_possible_map) {
548 u64 start = (u64)max_pfn << PAGE_SHIFT;
549 u64 end = 0;
550
551 for (i = 0; i < mi->nr_blks; i++) {
552 if (nid != mi->blk[i].nid)
553 continue;
554 start = min(mi->blk[i].start, start);
555 end = max(mi->blk[i].end, end);
691 } 556 }
557
558 if (start < end)
559 setup_node_bootmem(nid, start, end);
692 } 560 }
693 561
694 return best_node; 562 return 0;
695} 563}
696 564
697/* 565/**
698 * Setup early cpu_to_node. 566 * dummy_numma_init - Fallback dummy NUMA init
699 * 567 *
700 * Populate cpu_to_node[] only if x86_cpu_to_apicid[], 568 * Used if there's no underlying NUMA architecture, NUMA initialization
701 * and apicid_to_node[] tables have valid entries for a CPU. 569 * fails, or NUMA is disabled on the command line.
702 * This means we skip cpu_to_node[] initialisation for NUMA
703 * emulation and faking node case (when running a kernel compiled
704 * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
705 * is already initialized in a round robin manner at numa_init_array,
706 * prior to this call, and this initialization is good enough
707 * for the fake NUMA cases.
708 * 570 *
709 * Called before the per_cpu areas are setup. 571 * Must online at least one node and add memory blocks that cover all
572 * allowed memory. This function must not fail.
710 */ 573 */
711void __init init_cpu_to_node(void) 574static int __init dummy_numa_init(void)
712{ 575{
713 int cpu; 576 printk(KERN_INFO "%s\n",
714 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); 577 numa_off ? "NUMA turned off" : "No NUMA configuration found");
715 578 printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
716 BUG_ON(cpu_to_apicid == NULL); 579 0LU, max_pfn << PAGE_SHIFT);
717 580
718 for_each_possible_cpu(cpu) { 581 node_set(0, numa_nodes_parsed);
719 int node; 582 numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT);
720 u16 apicid = cpu_to_apicid[cpu];
721 583
722 if (apicid == BAD_APICID) 584 return 0;
723 continue;
724 node = apicid_to_node[apicid];
725 if (node == NUMA_NO_NODE)
726 continue;
727 if (!node_online(node))
728 node = find_near_online_node(node);
729 numa_set_node(cpu, node);
730 }
731} 585}
732#endif
733 586
734 587static int __init numa_init(int (*init_func)(void))
735void __cpuinit numa_set_node(int cpu, int node)
736{ 588{
737 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); 589 int i;
738 590 int ret;
739 /* early setting, no percpu area yet */
740 if (cpu_to_node_map) {
741 cpu_to_node_map[cpu] = node;
742 return;
743 }
744
745#ifdef CONFIG_DEBUG_PER_CPU_MAPS
746 if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
747 printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
748 dump_stack();
749 return;
750 }
751#endif
752 per_cpu(x86_cpu_to_node_map, cpu) = node;
753 591
754 if (node != NUMA_NO_NODE) 592 for (i = 0; i < MAX_LOCAL_APIC; i++)
755 set_cpu_numa_node(cpu, node); 593 set_apicid_to_node(i, NUMA_NO_NODE);
756}
757 594
758void __cpuinit numa_clear_node(int cpu) 595 nodes_clear(numa_nodes_parsed);
759{ 596 nodes_clear(node_possible_map);
760 numa_set_node(cpu, NUMA_NO_NODE); 597 nodes_clear(node_online_map);
761} 598 memset(&numa_meminfo, 0, sizeof(numa_meminfo));
599 remove_all_active_ranges();
600 numa_reset_distance();
762 601
763#ifndef CONFIG_DEBUG_PER_CPU_MAPS 602 ret = init_func();
603 if (ret < 0)
604 return ret;
605 ret = numa_cleanup_meminfo(&numa_meminfo);
606 if (ret < 0)
607 return ret;
764 608
765#ifndef CONFIG_NUMA_EMU 609 numa_emulation(&numa_meminfo, numa_distance_cnt);
766void __cpuinit numa_add_cpu(int cpu)
767{
768 cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
769}
770 610
771void __cpuinit numa_remove_cpu(int cpu) 611 ret = numa_register_memblks(&numa_meminfo);
772{ 612 if (ret < 0)
773 cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); 613 return ret;
774}
775#else
776void __cpuinit numa_add_cpu(int cpu)
777{
778 unsigned long addr;
779 u16 apicid;
780 int physnid;
781 int nid = NUMA_NO_NODE;
782 614
783 apicid = early_per_cpu(x86_cpu_to_apicid, cpu); 615 for (i = 0; i < nr_cpu_ids; i++) {
784 if (apicid != BAD_APICID) 616 int nid = early_cpu_to_node(i);
785 nid = apicid_to_node[apicid];
786 if (nid == NUMA_NO_NODE)
787 nid = early_cpu_to_node(cpu);
788 BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
789
790 /*
791 * Use the starting address of the emulated node to find which physical
792 * node it is allocated on.
793 */
794 addr = node_start_pfn(nid) << PAGE_SHIFT;
795 for (physnid = 0; physnid < MAX_NUMNODES; physnid++)
796 if (addr >= physnodes[physnid].start &&
797 addr < physnodes[physnid].end)
798 break;
799 617
800 /* 618 if (nid == NUMA_NO_NODE)
801 * Map the cpu to each emulated node that is allocated on the physical 619 continue;
802 * node of the cpu's apic id. 620 if (!node_online(nid))
803 */ 621 numa_clear_node(i);
804 for_each_online_node(nid) {
805 addr = node_start_pfn(nid) << PAGE_SHIFT;
806 if (addr >= physnodes[physnid].start &&
807 addr < physnodes[physnid].end)
808 cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);
809 } 622 }
623 numa_init_array();
624 return 0;
810} 625}
811 626
812void __cpuinit numa_remove_cpu(int cpu) 627void __init initmem_init(void)
813{ 628{
814 int i; 629 int ret;
815 630
816 for_each_online_node(i) 631 if (!numa_off) {
817 cpumask_clear_cpu(cpu, node_to_cpumask_map[i]); 632#ifdef CONFIG_ACPI_NUMA
818} 633 ret = numa_init(x86_acpi_numa_init);
819#endif /* !CONFIG_NUMA_EMU */ 634 if (!ret)
820 635 return;
821#else /* CONFIG_DEBUG_PER_CPU_MAPS */ 636#endif
822static struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable) 637#ifdef CONFIG_AMD_NUMA
823{ 638 ret = numa_init(amd_numa_init);
824 int node = early_cpu_to_node(cpu); 639 if (!ret)
825 struct cpumask *mask; 640 return;
826 char buf[64]; 641#endif
827
828 mask = node_to_cpumask_map[node];
829 if (!mask) {
830 pr_err("node_to_cpumask_map[%i] NULL\n", node);
831 dump_stack();
832 return NULL;
833 } 642 }
834 643
835 cpulist_scnprintf(buf, sizeof(buf), mask); 644 numa_init(dummy_numa_init);
836 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
837 enable ? "numa_add_cpu" : "numa_remove_cpu",
838 cpu, node, buf);
839 return mask;
840} 645}
841 646
842/* 647unsigned long __init numa_free_all_bootmem(void)
843 * --------- debug versions of the numa functions ---------
844 */
845#ifndef CONFIG_NUMA_EMU
846static void __cpuinit numa_set_cpumask(int cpu, int enable)
847{
848 struct cpumask *mask;
849
850 mask = debug_cpumask_set_cpu(cpu, enable);
851 if (!mask)
852 return;
853
854 if (enable)
855 cpumask_set_cpu(cpu, mask);
856 else
857 cpumask_clear_cpu(cpu, mask);
858}
859#else
860static void __cpuinit numa_set_cpumask(int cpu, int enable)
861{ 648{
862 int node = early_cpu_to_node(cpu); 649 unsigned long pages = 0;
863 struct cpumask *mask;
864 int i; 650 int i;
865 651
866 for_each_online_node(i) { 652 for_each_online_node(i)
867 unsigned long addr; 653 pages += free_all_bootmem_node(NODE_DATA(i));
868
869 addr = node_start_pfn(i) << PAGE_SHIFT;
870 if (addr < physnodes[node].start ||
871 addr >= physnodes[node].end)
872 continue;
873 mask = debug_cpumask_set_cpu(cpu, enable);
874 if (!mask)
875 return;
876
877 if (enable)
878 cpumask_set_cpu(cpu, mask);
879 else
880 cpumask_clear_cpu(cpu, mask);
881 }
882}
883#endif /* CONFIG_NUMA_EMU */
884 654
885void __cpuinit numa_add_cpu(int cpu) 655 pages += free_all_memory_core_early(MAX_NUMNODES);
886{
887 numa_set_cpumask(cpu, 1);
888}
889 656
890void __cpuinit numa_remove_cpu(int cpu) 657 return pages;
891{
892 numa_set_cpumask(cpu, 0);
893} 658}
894 659
895int __cpu_to_node(int cpu) 660int __cpuinit numa_cpu_node(int cpu)
896{ 661{
897 if (early_per_cpu_ptr(x86_cpu_to_node_map)) { 662 int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
898 printk(KERN_WARNING
899 "cpu_to_node(%d): usage too early!\n", cpu);
900 dump_stack();
901 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
902 }
903 return per_cpu(x86_cpu_to_node_map, cpu);
904}
905EXPORT_SYMBOL(__cpu_to_node);
906 663
907/* 664 if (apicid != BAD_APICID)
908 * Same function as cpu_to_node() but used if called before the 665 return __apicid_to_node[apicid];
909 * per_cpu areas are setup. 666 return NUMA_NO_NODE;
910 */
911int early_cpu_to_node(int cpu)
912{
913 if (early_per_cpu_ptr(x86_cpu_to_node_map))
914 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
915
916 if (!cpu_possible(cpu)) {
917 printk(KERN_WARNING
918 "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
919 dump_stack();
920 return NUMA_NO_NODE;
921 }
922 return per_cpu(x86_cpu_to_node_map, cpu);
923} 667}
924
925/*
926 * --------- end of debug versions of the numa functions ---------
927 */
928
929#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
new file mode 100644
index 000000000000..ad091e4cff17
--- /dev/null
+++ b/arch/x86/mm/numa_emulation.c
@@ -0,0 +1,494 @@
1/*
2 * NUMA emulation
3 */
4#include <linux/kernel.h>
5#include <linux/errno.h>
6#include <linux/topology.h>
7#include <linux/memblock.h>
8#include <asm/dma.h>
9
10#include "numa_internal.h"
11
12static int emu_nid_to_phys[MAX_NUMNODES] __cpuinitdata;
13static char *emu_cmdline __initdata;
14
15void __init numa_emu_cmdline(char *str)
16{
17 emu_cmdline = str;
18}
19
20static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi)
21{
22 int i;
23
24 for (i = 0; i < mi->nr_blks; i++)
25 if (mi->blk[i].nid == nid)
26 return i;
27 return -ENOENT;
28}
29
30/*
31 * Sets up nid to range from @start to @end. The return value is -errno if
32 * something went wrong, 0 otherwise.
33 */
34static int __init emu_setup_memblk(struct numa_meminfo *ei,
35 struct numa_meminfo *pi,
36 int nid, int phys_blk, u64 size)
37{
38 struct numa_memblk *eb = &ei->blk[ei->nr_blks];
39 struct numa_memblk *pb = &pi->blk[phys_blk];
40
41 if (ei->nr_blks >= NR_NODE_MEMBLKS) {
42 pr_err("NUMA: Too many emulated memblks, failing emulation\n");
43 return -EINVAL;
44 }
45
46 ei->nr_blks++;
47 eb->start = pb->start;
48 eb->end = pb->start + size;
49 eb->nid = nid;
50
51 if (emu_nid_to_phys[nid] == NUMA_NO_NODE)
52 emu_nid_to_phys[nid] = pb->nid;
53
54 pb->start += size;
55 if (pb->start >= pb->end) {
56 WARN_ON_ONCE(pb->start > pb->end);
57 numa_remove_memblk_from(phys_blk, pi);
58 }
59
60 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid,
61 eb->start, eb->end, (eb->end - eb->start) >> 20);
62 return 0;
63}
64
65/*
66 * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
67 * to max_addr. The return value is the number of nodes allocated.
68 */
69static int __init split_nodes_interleave(struct numa_meminfo *ei,
70 struct numa_meminfo *pi,
71 u64 addr, u64 max_addr, int nr_nodes)
72{
73 nodemask_t physnode_mask = NODE_MASK_NONE;
74 u64 size;
75 int big;
76 int nid = 0;
77 int i, ret;
78
79 if (nr_nodes <= 0)
80 return -1;
81 if (nr_nodes > MAX_NUMNODES) {
82 pr_info("numa=fake=%d too large, reducing to %d\n",
83 nr_nodes, MAX_NUMNODES);
84 nr_nodes = MAX_NUMNODES;
85 }
86
87 size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / nr_nodes;
88 /*
89 * Calculate the number of big nodes that can be allocated as a result
90 * of consolidating the remainder.
91 */
92 big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * nr_nodes) /
93 FAKE_NODE_MIN_SIZE;
94
95 size &= FAKE_NODE_MIN_HASH_MASK;
96 if (!size) {
97 pr_err("Not enough memory for each node. "
98 "NUMA emulation disabled.\n");
99 return -1;
100 }
101
102 for (i = 0; i < pi->nr_blks; i++)
103 node_set(pi->blk[i].nid, physnode_mask);
104
105 /*
106 * Continue to fill physical nodes with fake nodes until there is no
107 * memory left on any of them.
108 */
109 while (nodes_weight(physnode_mask)) {
110 for_each_node_mask(i, physnode_mask) {
111 u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
112 u64 start, limit, end;
113 int phys_blk;
114
115 phys_blk = emu_find_memblk_by_nid(i, pi);
116 if (phys_blk < 0) {
117 node_clear(i, physnode_mask);
118 continue;
119 }
120 start = pi->blk[phys_blk].start;
121 limit = pi->blk[phys_blk].end;
122 end = start + size;
123
124 if (nid < big)
125 end += FAKE_NODE_MIN_SIZE;
126
127 /*
128 * Continue to add memory to this fake node if its
129 * non-reserved memory is less than the per-node size.
130 */
131 while (end - start -
132 memblock_x86_hole_size(start, end) < size) {
133 end += FAKE_NODE_MIN_SIZE;
134 if (end > limit) {
135 end = limit;
136 break;
137 }
138 }
139
140 /*
141 * If there won't be at least FAKE_NODE_MIN_SIZE of
142 * non-reserved memory in ZONE_DMA32 for the next node,
143 * this one must extend to the boundary.
144 */
145 if (end < dma32_end && dma32_end - end -
146 memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
147 end = dma32_end;
148
149 /*
150 * If there won't be enough non-reserved memory for the
151 * next node, this one must extend to the end of the
152 * physical node.
153 */
154 if (limit - end -
155 memblock_x86_hole_size(end, limit) < size)
156 end = limit;
157
158 ret = emu_setup_memblk(ei, pi, nid++ % nr_nodes,
159 phys_blk,
160 min(end, limit) - start);
161 if (ret < 0)
162 return ret;
163 }
164 }
165 return 0;
166}
167
168/*
169 * Returns the end address of a node so that there is at least `size' amount of
170 * non-reserved memory or `max_addr' is reached.
171 */
172static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
173{
174 u64 end = start + size;
175
176 while (end - start - memblock_x86_hole_size(start, end) < size) {
177 end += FAKE_NODE_MIN_SIZE;
178 if (end > max_addr) {
179 end = max_addr;
180 break;
181 }
182 }
183 return end;
184}
185
186/*
187 * Sets up fake nodes of `size' interleaved over physical nodes ranging from
188 * `addr' to `max_addr'. The return value is the number of nodes allocated.
189 */
190static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
191 struct numa_meminfo *pi,
192 u64 addr, u64 max_addr, u64 size)
193{
194 nodemask_t physnode_mask = NODE_MASK_NONE;
195 u64 min_size;
196 int nid = 0;
197 int i, ret;
198
199 if (!size)
200 return -1;
201 /*
202 * The limit on emulated nodes is MAX_NUMNODES, so the size per node is
203 * increased accordingly if the requested size is too small. This
204 * creates a uniform distribution of node sizes across the entire
205 * machine (but not necessarily over physical nodes).
206 */
207 min_size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) /
208 MAX_NUMNODES;
209 min_size = max(min_size, FAKE_NODE_MIN_SIZE);
210 if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size)
211 min_size = (min_size + FAKE_NODE_MIN_SIZE) &
212 FAKE_NODE_MIN_HASH_MASK;
213 if (size < min_size) {
214 pr_err("Fake node size %LuMB too small, increasing to %LuMB\n",
215 size >> 20, min_size >> 20);
216 size = min_size;
217 }
218 size &= FAKE_NODE_MIN_HASH_MASK;
219
220 for (i = 0; i < pi->nr_blks; i++)
221 node_set(pi->blk[i].nid, physnode_mask);
222
223 /*
224 * Fill physical nodes with fake nodes of size until there is no memory
225 * left on any of them.
226 */
227 while (nodes_weight(physnode_mask)) {
228 for_each_node_mask(i, physnode_mask) {
229 u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT;
230 u64 start, limit, end;
231 int phys_blk;
232
233 phys_blk = emu_find_memblk_by_nid(i, pi);
234 if (phys_blk < 0) {
235 node_clear(i, physnode_mask);
236 continue;
237 }
238 start = pi->blk[phys_blk].start;
239 limit = pi->blk[phys_blk].end;
240
241 end = find_end_of_node(start, limit, size);
242 /*
243 * If there won't be at least FAKE_NODE_MIN_SIZE of
244 * non-reserved memory in ZONE_DMA32 for the next node,
245 * this one must extend to the boundary.
246 */
247 if (end < dma32_end && dma32_end - end -
248 memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
249 end = dma32_end;
250
251 /*
252 * If there won't be enough non-reserved memory for the
253 * next node, this one must extend to the end of the
254 * physical node.
255 */
256 if (limit - end -
257 memblock_x86_hole_size(end, limit) < size)
258 end = limit;
259
260 ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES,
261 phys_blk,
262 min(end, limit) - start);
263 if (ret < 0)
264 return ret;
265 }
266 }
267 return 0;
268}
269
270/**
271 * numa_emulation - Emulate NUMA nodes
272 * @numa_meminfo: NUMA configuration to massage
273 * @numa_dist_cnt: The size of the physical NUMA distance table
274 *
275 * Emulate NUMA nodes according to the numa=fake kernel parameter.
276 * @numa_meminfo contains the physical memory configuration and is modified
277 * to reflect the emulated configuration on success. @numa_dist_cnt is
278 * used to determine the size of the physical distance table.
279 *
280 * On success, the following modifications are made.
281 *
282 * - @numa_meminfo is updated to reflect the emulated nodes.
283 *
284 * - __apicid_to_node[] is updated such that APIC IDs are mapped to the
285 * emulated nodes.
286 *
287 * - NUMA distance table is rebuilt to represent distances between emulated
288 * nodes. The distances are determined considering how emulated nodes
289 * are mapped to physical nodes and match the actual distances.
290 *
291 * - emu_nid_to_phys[] reflects how emulated nodes are mapped to physical
292 * nodes. This is used by numa_add_cpu() and numa_remove_cpu().
293 *
294 * If emulation is not enabled or fails, emu_nid_to_phys[] is filled with
295 * identity mapping and no other modification is made.
296 */
297void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
298{
299 static struct numa_meminfo ei __initdata;
300 static struct numa_meminfo pi __initdata;
301 const u64 max_addr = max_pfn << PAGE_SHIFT;
302 u8 *phys_dist = NULL;
303 size_t phys_size = numa_dist_cnt * numa_dist_cnt * sizeof(phys_dist[0]);
304 int max_emu_nid, dfl_phys_nid;
305 int i, j, ret;
306
307 if (!emu_cmdline)
308 goto no_emu;
309
310 memset(&ei, 0, sizeof(ei));
311 pi = *numa_meminfo;
312
313 for (i = 0; i < MAX_NUMNODES; i++)
314 emu_nid_to_phys[i] = NUMA_NO_NODE;
315
316 /*
317 * If the numa=fake command-line contains a 'M' or 'G', it represents
318 * the fixed node size. Otherwise, if it is just a single number N,
319 * split the system RAM into N fake nodes.
320 */
321 if (strchr(emu_cmdline, 'M') || strchr(emu_cmdline, 'G')) {
322 u64 size;
323
324 size = memparse(emu_cmdline, &emu_cmdline);
325 ret = split_nodes_size_interleave(&ei, &pi, 0, max_addr, size);
326 } else {
327 unsigned long n;
328
329 n = simple_strtoul(emu_cmdline, NULL, 0);
330 ret = split_nodes_interleave(&ei, &pi, 0, max_addr, n);
331 }
332
333 if (ret < 0)
334 goto no_emu;
335
336 if (numa_cleanup_meminfo(&ei) < 0) {
337 pr_warning("NUMA: Warning: constructed meminfo invalid, disabling emulation\n");
338 goto no_emu;
339 }
340
341 /* copy the physical distance table */
342 if (numa_dist_cnt) {
343 u64 phys;
344
345 phys = memblock_find_in_range(0,
346 (u64)max_pfn_mapped << PAGE_SHIFT,
347 phys_size, PAGE_SIZE);
348 if (phys == MEMBLOCK_ERROR) {
349 pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
350 goto no_emu;
351 }
352 memblock_x86_reserve_range(phys, phys + phys_size, "TMP NUMA DIST");
353 phys_dist = __va(phys);
354
355 for (i = 0; i < numa_dist_cnt; i++)
356 for (j = 0; j < numa_dist_cnt; j++)
357 phys_dist[i * numa_dist_cnt + j] =
358 node_distance(i, j);
359 }
360
361 /*
362 * Determine the max emulated nid and the default phys nid to use
363 * for unmapped nodes.
364 */
365 max_emu_nid = 0;
366 dfl_phys_nid = NUMA_NO_NODE;
367 for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++) {
368 if (emu_nid_to_phys[i] != NUMA_NO_NODE) {
369 max_emu_nid = i;
370 if (dfl_phys_nid == NUMA_NO_NODE)
371 dfl_phys_nid = emu_nid_to_phys[i];
372 }
373 }
374 if (dfl_phys_nid == NUMA_NO_NODE) {
375 pr_warning("NUMA: Warning: can't determine default physical node, disabling emulation\n");
376 goto no_emu;
377 }
378
379 /* commit */
380 *numa_meminfo = ei;
381
382 /*
383 * Transform __apicid_to_node table to use emulated nids by
384 * reverse-mapping phys_nid. The maps should always exist but fall
385 * back to zero just in case.
386 */
387 for (i = 0; i < ARRAY_SIZE(__apicid_to_node); i++) {
388 if (__apicid_to_node[i] == NUMA_NO_NODE)
389 continue;
390 for (j = 0; j < ARRAY_SIZE(emu_nid_to_phys); j++)
391 if (__apicid_to_node[i] == emu_nid_to_phys[j])
392 break;
393 __apicid_to_node[i] = j < ARRAY_SIZE(emu_nid_to_phys) ? j : 0;
394 }
395
396 /* make sure all emulated nodes are mapped to a physical node */
397 for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++)
398 if (emu_nid_to_phys[i] == NUMA_NO_NODE)
399 emu_nid_to_phys[i] = dfl_phys_nid;
400
401 /* transform distance table */
402 numa_reset_distance();
403 for (i = 0; i < max_emu_nid + 1; i++) {
404 for (j = 0; j < max_emu_nid + 1; j++) {
405 int physi = emu_nid_to_phys[i];
406 int physj = emu_nid_to_phys[j];
407 int dist;
408
409 if (physi >= numa_dist_cnt || physj >= numa_dist_cnt)
410 dist = physi == physj ?
411 LOCAL_DISTANCE : REMOTE_DISTANCE;
412 else
413 dist = phys_dist[physi * numa_dist_cnt + physj];
414
415 numa_set_distance(i, j, dist);
416 }
417 }
418
419 /* free the copied physical distance table */
420 if (phys_dist)
421 memblock_x86_free_range(__pa(phys_dist), __pa(phys_dist) + phys_size);
422 return;
423
424no_emu:
425 /* No emulation. Build identity emu_nid_to_phys[] for numa_add_cpu() */
426 for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++)
427 emu_nid_to_phys[i] = i;
428}
429
430#ifndef CONFIG_DEBUG_PER_CPU_MAPS
431void __cpuinit numa_add_cpu(int cpu)
432{
433 int physnid, nid;
434
435 nid = early_cpu_to_node(cpu);
436 BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
437
438 physnid = emu_nid_to_phys[nid];
439
440 /*
441 * Map the cpu to each emulated node that is allocated on the physical
442 * node of the cpu's apic id.
443 */
444 for_each_online_node(nid)
445 if (emu_nid_to_phys[nid] == physnid)
446 cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);
447}
448
449void __cpuinit numa_remove_cpu(int cpu)
450{
451 int i;
452
453 for_each_online_node(i)
454 cpumask_clear_cpu(cpu, node_to_cpumask_map[i]);
455}
456#else /* !CONFIG_DEBUG_PER_CPU_MAPS */
457static void __cpuinit numa_set_cpumask(int cpu, int enable)
458{
459 struct cpumask *mask;
460 int nid, physnid, i;
461
462 nid = early_cpu_to_node(cpu);
463 if (nid == NUMA_NO_NODE) {
464 /* early_cpu_to_node() already emits a warning and trace */
465 return;
466 }
467
468 physnid = emu_nid_to_phys[nid];
469
470 for_each_online_node(i) {
471 if (emu_nid_to_phys[nid] != physnid)
472 continue;
473
474 mask = debug_cpumask_set_cpu(cpu, enable);
475 if (!mask)
476 return;
477
478 if (enable)
479 cpumask_set_cpu(cpu, mask);
480 else
481 cpumask_clear_cpu(cpu, mask);
482 }
483}
484
485void __cpuinit numa_add_cpu(int cpu)
486{
487 numa_set_cpumask(cpu, 1);
488}
489
490void __cpuinit numa_remove_cpu(int cpu)
491{
492 numa_set_cpumask(cpu, 0);
493}
494#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h
new file mode 100644
index 000000000000..ef2d97377d7c
--- /dev/null
+++ b/arch/x86/mm/numa_internal.h
@@ -0,0 +1,31 @@
1#ifndef __X86_MM_NUMA_INTERNAL_H
2#define __X86_MM_NUMA_INTERNAL_H
3
4#include <linux/types.h>
5#include <asm/numa.h>
6
7struct numa_memblk {
8 u64 start;
9 u64 end;
10 int nid;
11};
12
13struct numa_meminfo {
14 int nr_blks;
15 struct numa_memblk blk[NR_NODE_MEMBLKS];
16};
17
18void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi);
19int __init numa_cleanup_meminfo(struct numa_meminfo *mi);
20void __init numa_reset_distance(void);
21
22#ifdef CONFIG_NUMA_EMU
23void __init numa_emulation(struct numa_meminfo *numa_meminfo,
24 int numa_dist_cnt);
25#else
26static inline void numa_emulation(struct numa_meminfo *numa_meminfo,
27 int numa_dist_cnt)
28{ }
29#endif
30
31#endif /* __X86_MM_NUMA_INTERNAL_H */
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index d343b3c81f3c..90825f2eb0f4 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -57,12 +57,10 @@ static unsigned long direct_pages_count[PG_LEVEL_NUM];
57 57
58void update_page_count(int level, unsigned long pages) 58void update_page_count(int level, unsigned long pages)
59{ 59{
60 unsigned long flags;
61
62 /* Protect against CPA */ 60 /* Protect against CPA */
63 spin_lock_irqsave(&pgd_lock, flags); 61 spin_lock(&pgd_lock);
64 direct_pages_count[level] += pages; 62 direct_pages_count[level] += pages;
65 spin_unlock_irqrestore(&pgd_lock, flags); 63 spin_unlock(&pgd_lock);
66} 64}
67 65
68static void split_page_count(int level) 66static void split_page_count(int level)
@@ -394,7 +392,7 @@ static int
394try_preserve_large_page(pte_t *kpte, unsigned long address, 392try_preserve_large_page(pte_t *kpte, unsigned long address,
395 struct cpa_data *cpa) 393 struct cpa_data *cpa)
396{ 394{
397 unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; 395 unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn;
398 pte_t new_pte, old_pte, *tmp; 396 pte_t new_pte, old_pte, *tmp;
399 pgprot_t old_prot, new_prot, req_prot; 397 pgprot_t old_prot, new_prot, req_prot;
400 int i, do_split = 1; 398 int i, do_split = 1;
@@ -403,7 +401,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
403 if (cpa->force_split) 401 if (cpa->force_split)
404 return 1; 402 return 1;
405 403
406 spin_lock_irqsave(&pgd_lock, flags); 404 spin_lock(&pgd_lock);
407 /* 405 /*
408 * Check for races, another CPU might have split this page 406 * Check for races, another CPU might have split this page
409 * up already: 407 * up already:
@@ -498,14 +496,14 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
498 } 496 }
499 497
500out_unlock: 498out_unlock:
501 spin_unlock_irqrestore(&pgd_lock, flags); 499 spin_unlock(&pgd_lock);
502 500
503 return do_split; 501 return do_split;
504} 502}
505 503
506static int split_large_page(pte_t *kpte, unsigned long address) 504static int split_large_page(pte_t *kpte, unsigned long address)
507{ 505{
508 unsigned long flags, pfn, pfninc = 1; 506 unsigned long pfn, pfninc = 1;
509 unsigned int i, level; 507 unsigned int i, level;
510 pte_t *pbase, *tmp; 508 pte_t *pbase, *tmp;
511 pgprot_t ref_prot; 509 pgprot_t ref_prot;
@@ -519,7 +517,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
519 if (!base) 517 if (!base)
520 return -ENOMEM; 518 return -ENOMEM;
521 519
522 spin_lock_irqsave(&pgd_lock, flags); 520 spin_lock(&pgd_lock);
523 /* 521 /*
524 * Check for races, another CPU might have split this page 522 * Check for races, another CPU might have split this page
525 * up for us already: 523 * up for us already:
@@ -591,7 +589,7 @@ out_unlock:
591 */ 589 */
592 if (base) 590 if (base)
593 __free_page(base); 591 __free_page(base);
594 spin_unlock_irqrestore(&pgd_lock, flags); 592 spin_unlock(&pgd_lock);
595 593
596 return 0; 594 return 0;
597} 595}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 500242d3c96d..0113d19c8aa6 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -121,14 +121,12 @@ static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
121 121
122static void pgd_dtor(pgd_t *pgd) 122static void pgd_dtor(pgd_t *pgd)
123{ 123{
124 unsigned long flags; /* can be called from interrupt context */
125
126 if (SHARED_KERNEL_PMD) 124 if (SHARED_KERNEL_PMD)
127 return; 125 return;
128 126
129 spin_lock_irqsave(&pgd_lock, flags); 127 spin_lock(&pgd_lock);
130 pgd_list_del(pgd); 128 pgd_list_del(pgd);
131 spin_unlock_irqrestore(&pgd_lock, flags); 129 spin_unlock(&pgd_lock);
132} 130}
133 131
134/* 132/*
@@ -260,7 +258,6 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
260{ 258{
261 pgd_t *pgd; 259 pgd_t *pgd;
262 pmd_t *pmds[PREALLOCATED_PMDS]; 260 pmd_t *pmds[PREALLOCATED_PMDS];
263 unsigned long flags;
264 261
265 pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); 262 pgd = (pgd_t *)__get_free_page(PGALLOC_GFP);
266 263
@@ -280,12 +277,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
280 * respect to anything walking the pgd_list, so that they 277 * respect to anything walking the pgd_list, so that they
281 * never see a partially populated pgd. 278 * never see a partially populated pgd.
282 */ 279 */
283 spin_lock_irqsave(&pgd_lock, flags); 280 spin_lock(&pgd_lock);
284 281
285 pgd_ctor(mm, pgd); 282 pgd_ctor(mm, pgd);
286 pgd_prepopulate_pmd(mm, pgd, pmds); 283 pgd_prepopulate_pmd(mm, pgd, pmds);
287 284
288 spin_unlock_irqrestore(&pgd_lock, flags); 285 spin_unlock(&pgd_lock);
289 286
290 return pgd; 287 return pgd;
291 288
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index ae96e7b8051d..48651c6f657d 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -57,7 +57,7 @@ struct node_memory_chunk_s {
57static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS]; 57static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS];
58 58
59static int __initdata num_memory_chunks; /* total number of memory chunks */ 59static int __initdata num_memory_chunks; /* total number of memory chunks */
60static u8 __initdata apicid_to_pxm[MAX_APICID]; 60static u8 __initdata apicid_to_pxm[MAX_LOCAL_APIC];
61 61
62int acpi_numa __initdata; 62int acpi_numa __initdata;
63 63
@@ -254,8 +254,8 @@ int __init get_memcfg_from_srat(void)
254 printk(KERN_DEBUG "Number of memory chunks in system = %d\n", 254 printk(KERN_DEBUG "Number of memory chunks in system = %d\n",
255 num_memory_chunks); 255 num_memory_chunks);
256 256
257 for (i = 0; i < MAX_APICID; i++) 257 for (i = 0; i < MAX_LOCAL_APIC; i++)
258 apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]); 258 set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i]));
259 259
260 for (j = 0; j < num_memory_chunks; j++){ 260 for (j = 0; j < num_memory_chunks; j++){
261 struct node_memory_chunk_s * chunk = &node_memory_chunk[j]; 261 struct node_memory_chunk_s * chunk = &node_memory_chunk[j];
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 603d285d1daa..8e9d3394f6d4 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -26,88 +26,34 @@
26 26
27int acpi_numa __initdata; 27int acpi_numa __initdata;
28 28
29static struct acpi_table_slit *acpi_slit;
30
31static nodemask_t nodes_parsed __initdata;
32static nodemask_t cpu_nodes_parsed __initdata;
33static struct bootnode nodes[MAX_NUMNODES] __initdata;
34static struct bootnode nodes_add[MAX_NUMNODES]; 29static struct bootnode nodes_add[MAX_NUMNODES];
35 30
36static int num_node_memblks __initdata;
37static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
38static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
39
40static __init int setup_node(int pxm) 31static __init int setup_node(int pxm)
41{ 32{
42 return acpi_map_pxm_to_node(pxm); 33 return acpi_map_pxm_to_node(pxm);
43} 34}
44 35
45static __init int conflicting_memblks(unsigned long start, unsigned long end)
46{
47 int i;
48 for (i = 0; i < num_node_memblks; i++) {
49 struct bootnode *nd = &node_memblk_range[i];
50 if (nd->start == nd->end)
51 continue;
52 if (nd->end > start && nd->start < end)
53 return memblk_nodeid[i];
54 if (nd->end == end && nd->start == start)
55 return memblk_nodeid[i];
56 }
57 return -1;
58}
59
60static __init void cutoff_node(int i, unsigned long start, unsigned long end)
61{
62 struct bootnode *nd = &nodes[i];
63
64 if (nd->start < start) {
65 nd->start = start;
66 if (nd->end < nd->start)
67 nd->start = nd->end;
68 }
69 if (nd->end > end) {
70 nd->end = end;
71 if (nd->start > nd->end)
72 nd->start = nd->end;
73 }
74}
75
76static __init void bad_srat(void) 36static __init void bad_srat(void)
77{ 37{
78 int i;
79 printk(KERN_ERR "SRAT: SRAT not used.\n"); 38 printk(KERN_ERR "SRAT: SRAT not used.\n");
80 acpi_numa = -1; 39 acpi_numa = -1;
81 for (i = 0; i < MAX_LOCAL_APIC; i++) 40 memset(nodes_add, 0, sizeof(nodes_add));
82 apicid_to_node[i] = NUMA_NO_NODE;
83 for (i = 0; i < MAX_NUMNODES; i++) {
84 nodes[i].start = nodes[i].end = 0;
85 nodes_add[i].start = nodes_add[i].end = 0;
86 }
87 remove_all_active_ranges();
88} 41}
89 42
90static __init inline int srat_disabled(void) 43static __init inline int srat_disabled(void)
91{ 44{
92 return numa_off || acpi_numa < 0; 45 return acpi_numa < 0;
93} 46}
94 47
95/* Callback for SLIT parsing */ 48/* Callback for SLIT parsing */
96void __init acpi_numa_slit_init(struct acpi_table_slit *slit) 49void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
97{ 50{
98 unsigned length; 51 int i, j;
99 unsigned long phys;
100
101 length = slit->header.length;
102 phys = memblock_find_in_range(0, max_pfn_mapped<<PAGE_SHIFT, length,
103 PAGE_SIZE);
104
105 if (phys == MEMBLOCK_ERROR)
106 panic(" Can not save slit!\n");
107 52
108 acpi_slit = __va(phys); 53 for (i = 0; i < slit->locality_count; i++)
109 memcpy(acpi_slit, slit, length); 54 for (j = 0; j < slit->locality_count; j++)
110 memblock_x86_reserve_range(phys, phys + length, "ACPI SLIT"); 55 numa_set_distance(pxm_to_node(i), pxm_to_node(j),
56 slit->entry[slit->locality_count * i + j]);
111} 57}
112 58
113/* Callback for Proximity Domain -> x2APIC mapping */ 59/* Callback for Proximity Domain -> x2APIC mapping */
@@ -138,8 +84,8 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
138 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); 84 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
139 return; 85 return;
140 } 86 }
141 apicid_to_node[apic_id] = node; 87 set_apicid_to_node(apic_id, node);
142 node_set(node, cpu_nodes_parsed); 88 node_set(node, numa_nodes_parsed);
143 acpi_numa = 1; 89 acpi_numa = 1;
144 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", 90 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
145 pxm, apic_id, node); 91 pxm, apic_id, node);
@@ -178,8 +124,8 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
178 return; 124 return;
179 } 125 }
180 126
181 apicid_to_node[apic_id] = node; 127 set_apicid_to_node(apic_id, node);
182 node_set(node, cpu_nodes_parsed); 128 node_set(node, numa_nodes_parsed);
183 acpi_numa = 1; 129 acpi_numa = 1;
184 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", 130 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
185 pxm, apic_id, node); 131 pxm, apic_id, node);
@@ -241,7 +187,7 @@ update_nodes_add(int node, unsigned long start, unsigned long end)
241 } 187 }
242 188
243 if (changed) { 189 if (changed) {
244 node_set(node, cpu_nodes_parsed); 190 node_set(node, numa_nodes_parsed);
245 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", 191 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
246 nd->start, nd->end); 192 nd->start, nd->end);
247 } 193 }
@@ -251,10 +197,8 @@ update_nodes_add(int node, unsigned long start, unsigned long end)
251void __init 197void __init
252acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) 198acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
253{ 199{
254 struct bootnode *nd, oldnode;
255 unsigned long start, end; 200 unsigned long start, end;
256 int node, pxm; 201 int node, pxm;
257 int i;
258 202
259 if (srat_disabled()) 203 if (srat_disabled())
260 return; 204 return;
@@ -276,300 +220,31 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
276 bad_srat(); 220 bad_srat();
277 return; 221 return;
278 } 222 }
279 i = conflicting_memblks(start, end); 223
280 if (i == node) { 224 if (numa_add_memblk(node, start, end) < 0) {
281 printk(KERN_WARNING
282 "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
283 pxm, start, end, nodes[i].start, nodes[i].end);
284 } else if (i >= 0) {
285 printk(KERN_ERR
286 "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n",
287 pxm, start, end, node_to_pxm(i),
288 nodes[i].start, nodes[i].end);
289 bad_srat(); 225 bad_srat();
290 return; 226 return;
291 } 227 }
292 nd = &nodes[node];
293 oldnode = *nd;
294 if (!node_test_and_set(node, nodes_parsed)) {
295 nd->start = start;
296 nd->end = end;
297 } else {
298 if (start < nd->start)
299 nd->start = start;
300 if (nd->end < end)
301 nd->end = end;
302 }
303 228
304 printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, 229 printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
305 start, end); 230 start, end);
306 231
307 if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) { 232 if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)
308 update_nodes_add(node, start, end); 233 update_nodes_add(node, start, end);
309 /* restore nodes[node] */
310 *nd = oldnode;
311 if ((nd->start | nd->end) == 0)
312 node_clear(node, nodes_parsed);
313 }
314
315 node_memblk_range[num_node_memblks].start = start;
316 node_memblk_range[num_node_memblks].end = end;
317 memblk_nodeid[num_node_memblks] = node;
318 num_node_memblks++;
319}
320
321/* Sanity check to catch more bad SRATs (they are amazingly common).
322 Make sure the PXMs cover all memory. */
323static int __init nodes_cover_memory(const struct bootnode *nodes)
324{
325 int i;
326 unsigned long pxmram, e820ram;
327
328 pxmram = 0;
329 for_each_node_mask(i, nodes_parsed) {
330 unsigned long s = nodes[i].start >> PAGE_SHIFT;
331 unsigned long e = nodes[i].end >> PAGE_SHIFT;
332 pxmram += e - s;
333 pxmram -= __absent_pages_in_range(i, s, e);
334 if ((long)pxmram < 0)
335 pxmram = 0;
336 }
337
338 e820ram = max_pfn - (memblock_x86_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT);
339 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
340 if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) {
341 printk(KERN_ERR
342 "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
343 (pxmram << PAGE_SHIFT) >> 20,
344 (e820ram << PAGE_SHIFT) >> 20);
345 return 0;
346 }
347 return 1;
348} 234}
349 235
350void __init acpi_numa_arch_fixup(void) {} 236void __init acpi_numa_arch_fixup(void) {}
351 237
352#ifdef CONFIG_NUMA_EMU 238int __init x86_acpi_numa_init(void)
353void __init acpi_get_nodes(struct bootnode *physnodes, unsigned long start,
354 unsigned long end)
355{
356 int i;
357
358 for_each_node_mask(i, nodes_parsed) {
359 cutoff_node(i, start, end);
360 physnodes[i].start = nodes[i].start;
361 physnodes[i].end = nodes[i].end;
362 }
363}
364#endif /* CONFIG_NUMA_EMU */
365
366/* Use the information discovered above to actually set up the nodes. */
367int __init acpi_scan_nodes(unsigned long start, unsigned long end)
368{ 239{
369 int i; 240 int ret;
370
371 if (acpi_numa <= 0)
372 return -1;
373
374 /* First clean up the node list */
375 for (i = 0; i < MAX_NUMNODES; i++)
376 cutoff_node(i, start, end);
377
378 /*
379 * Join together blocks on the same node, holes between
380 * which don't overlap with memory on other nodes.
381 */
382 for (i = 0; i < num_node_memblks; ++i) {
383 int j, k;
384
385 for (j = i + 1; j < num_node_memblks; ++j) {
386 unsigned long start, end;
387
388 if (memblk_nodeid[i] != memblk_nodeid[j])
389 continue;
390 start = min(node_memblk_range[i].end,
391 node_memblk_range[j].end);
392 end = max(node_memblk_range[i].start,
393 node_memblk_range[j].start);
394 for (k = 0; k < num_node_memblks; ++k) {
395 if (memblk_nodeid[i] == memblk_nodeid[k])
396 continue;
397 if (start < node_memblk_range[k].end &&
398 end > node_memblk_range[k].start)
399 break;
400 }
401 if (k < num_node_memblks)
402 continue;
403 start = min(node_memblk_range[i].start,
404 node_memblk_range[j].start);
405 end = max(node_memblk_range[i].end,
406 node_memblk_range[j].end);
407 printk(KERN_INFO "SRAT: Node %d "
408 "[%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n",
409 memblk_nodeid[i],
410 node_memblk_range[i].start,
411 node_memblk_range[i].end,
412 node_memblk_range[j].start,
413 node_memblk_range[j].end,
414 start, end);
415 node_memblk_range[i].start = start;
416 node_memblk_range[i].end = end;
417 k = --num_node_memblks - j;
418 memmove(memblk_nodeid + j, memblk_nodeid + j+1,
419 k * sizeof(*memblk_nodeid));
420 memmove(node_memblk_range + j, node_memblk_range + j+1,
421 k * sizeof(*node_memblk_range));
422 --j;
423 }
424 }
425
426 memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
427 memblk_nodeid);
428 if (memnode_shift < 0) {
429 printk(KERN_ERR
430 "SRAT: No NUMA node hash function found. Contact maintainer\n");
431 bad_srat();
432 return -1;
433 }
434
435 for (i = 0; i < num_node_memblks; i++)
436 memblock_x86_register_active_regions(memblk_nodeid[i],
437 node_memblk_range[i].start >> PAGE_SHIFT,
438 node_memblk_range[i].end >> PAGE_SHIFT);
439
440 /* for out of order entries in SRAT */
441 sort_node_map();
442 if (!nodes_cover_memory(nodes)) {
443 bad_srat();
444 return -1;
445 }
446 241
447 /* Account for nodes with cpus and no memory */ 242 ret = acpi_numa_init();
448 nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed); 243 if (ret < 0)
449 244 return ret;
450 /* Finally register nodes */ 245 return srat_disabled() ? -EINVAL : 0;
451 for_each_node_mask(i, node_possible_map)
452 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
453 /* Try again in case setup_node_bootmem missed one due
454 to missing bootmem */
455 for_each_node_mask(i, node_possible_map)
456 if (!node_online(i))
457 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
458
459 for (i = 0; i < nr_cpu_ids; i++) {
460 int node = early_cpu_to_node(i);
461
462 if (node == NUMA_NO_NODE)
463 continue;
464 if (!node_online(node))
465 numa_clear_node(i);
466 }
467 numa_init_array();
468 return 0;
469}
470
471#ifdef CONFIG_NUMA_EMU
472static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = {
473 [0 ... MAX_NUMNODES-1] = PXM_INVAL
474};
475static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
476 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
477};
478static int __init find_node_by_addr(unsigned long addr)
479{
480 int ret = NUMA_NO_NODE;
481 int i;
482
483 for_each_node_mask(i, nodes_parsed) {
484 /*
485 * Find the real node that this emulated node appears on. For
486 * the sake of simplicity, we only use a real node's starting
487 * address to determine which emulated node it appears on.
488 */
489 if (addr >= nodes[i].start && addr < nodes[i].end) {
490 ret = i;
491 break;
492 }
493 }
494 return ret;
495} 246}
496 247
497/*
498 * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID
499 * mappings that respect the real ACPI topology but reflect our emulated
500 * environment. For each emulated node, we find which real node it appears on
501 * and create PXM to NID mappings for those fake nodes which mirror that
502 * locality. SLIT will now represent the correct distances between emulated
503 * nodes as a result of the real topology.
504 */
505void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
506{
507 int i, j;
508
509 for (i = 0; i < num_nodes; i++) {
510 int nid, pxm;
511
512 nid = find_node_by_addr(fake_nodes[i].start);
513 if (nid == NUMA_NO_NODE)
514 continue;
515 pxm = node_to_pxm(nid);
516 if (pxm == PXM_INVAL)
517 continue;
518 fake_node_to_pxm_map[i] = pxm;
519 /*
520 * For each apicid_to_node mapping that exists for this real
521 * node, it must now point to the fake node ID.
522 */
523 for (j = 0; j < MAX_LOCAL_APIC; j++)
524 if (apicid_to_node[j] == nid &&
525 fake_apicid_to_node[j] == NUMA_NO_NODE)
526 fake_apicid_to_node[j] = i;
527 }
528
529 /*
530 * If there are apicid-to-node mappings for physical nodes that do not
531 * have a corresponding emulated node, it should default to a guaranteed
532 * value.
533 */
534 for (i = 0; i < MAX_LOCAL_APIC; i++)
535 if (apicid_to_node[i] != NUMA_NO_NODE &&
536 fake_apicid_to_node[i] == NUMA_NO_NODE)
537 fake_apicid_to_node[i] = 0;
538
539 for (i = 0; i < num_nodes; i++)
540 __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
541 memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
542
543 nodes_clear(nodes_parsed);
544 for (i = 0; i < num_nodes; i++)
545 if (fake_nodes[i].start != fake_nodes[i].end)
546 node_set(i, nodes_parsed);
547}
548
549static int null_slit_node_compare(int a, int b)
550{
551 return node_to_pxm(a) == node_to_pxm(b);
552}
553#else
554static int null_slit_node_compare(int a, int b)
555{
556 return a == b;
557}
558#endif /* CONFIG_NUMA_EMU */
559
560int __node_distance(int a, int b)
561{
562 int index;
563
564 if (!acpi_slit)
565 return null_slit_node_compare(a, b) ? LOCAL_DISTANCE :
566 REMOTE_DISTANCE;
567 index = acpi_slit->locality_count * node_to_pxm(a);
568 return acpi_slit->entry[index + node_to_pxm(b)];
569}
570
571EXPORT_SYMBOL(__node_distance);
572
573#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY) 248#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY)
574int memory_add_physaddr_to_nid(u64 start) 249int memory_add_physaddr_to_nid(u64 start)
575{ 250{
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 6acc724d5d8f..d6c0418c3e47 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -179,12 +179,8 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
179 sender = this_cpu_read(tlb_vector_offset); 179 sender = this_cpu_read(tlb_vector_offset);
180 f = &flush_state[sender]; 180 f = &flush_state[sender];
181 181
182 /* 182 if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS)
183 * Could avoid this lock when 183 raw_spin_lock(&f->tlbstate_lock);
184 * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
185 * probably not worth checking this for a cache-hot lock.
186 */
187 raw_spin_lock(&f->tlbstate_lock);
188 184
189 f->flush_mm = mm; 185 f->flush_mm = mm;
190 f->flush_va = va; 186 f->flush_va = va;
@@ -202,7 +198,8 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
202 198
203 f->flush_mm = NULL; 199 f->flush_mm = NULL;
204 f->flush_va = 0; 200 f->flush_va = 0;
205 raw_spin_unlock(&f->tlbstate_lock); 201 if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS)
202 raw_spin_unlock(&f->tlbstate_lock);
206} 203}
207 204
208void native_flush_tlb_others(const struct cpumask *cpumask, 205void native_flush_tlb_others(const struct cpumask *cpumask,
@@ -211,11 +208,10 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
211 if (is_uv_system()) { 208 if (is_uv_system()) {
212 unsigned int cpu; 209 unsigned int cpu;
213 210
214 cpu = get_cpu(); 211 cpu = smp_processor_id();
215 cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu); 212 cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu);
216 if (cpumask) 213 if (cpumask)
217 flush_tlb_others_ipi(cpumask, mm, va); 214 flush_tlb_others_ipi(cpumask, mm, va);
218 put_cpu();
219 return; 215 return;
220 } 216 }
221 flush_tlb_others_ipi(cpumask, mm, va); 217 flush_tlb_others_ipi(cpumask, mm, va);
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index e27dffbbb1a7..026e4931d162 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -350,7 +350,7 @@ static int __init early_fill_mp_bus_info(void)
350 350
351#define ENABLE_CF8_EXT_CFG (1ULL << 46) 351#define ENABLE_CF8_EXT_CFG (1ULL << 46)
352 352
353static void enable_pci_io_ecs(void *unused) 353static void __cpuinit enable_pci_io_ecs(void *unused)
354{ 354{
355 u64 reg; 355 u64 reg;
356 rdmsrl(MSR_AMD64_NB_CFG, reg); 356 rdmsrl(MSR_AMD64_NB_CFG, reg);
diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c
index c63c6d3dd8e8..67858be4b52b 100644
--- a/arch/x86/pci/ce4100.c
+++ b/arch/x86/pci/ce4100.c
@@ -34,6 +34,7 @@
34#include <linux/pci.h> 34#include <linux/pci.h>
35#include <linux/init.h> 35#include <linux/init.h>
36 36
37#include <asm/ce4100.h>
37#include <asm/pci_x86.h> 38#include <asm/pci_x86.h>
38 39
39struct sim_reg { 40struct sim_reg {
@@ -306,10 +307,10 @@ struct pci_raw_ops ce4100_pci_conf = {
306 .write = ce4100_conf_write, 307 .write = ce4100_conf_write,
307}; 308};
308 309
309static int __init ce4100_pci_init(void) 310int __init ce4100_pci_init(void)
310{ 311{
311 init_sim_regs(); 312 init_sim_regs();
312 raw_pci_ops = &ce4100_pci_conf; 313 raw_pci_ops = &ce4100_pci_conf;
313 return 0; 314 /* Indicate caller that it should invoke pci_legacy_init() */
315 return 1;
314} 316}
315subsys_initcall(ce4100_pci_init);
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 25cd4a07d09f..8c4085a95ef1 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -20,7 +20,8 @@
20#include <asm/xen/pci.h> 20#include <asm/xen/pci.h>
21 21
22#ifdef CONFIG_ACPI 22#ifdef CONFIG_ACPI
23static int xen_hvm_register_pirq(u32 gsi, int triggering) 23static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
24 int trigger, int polarity)
24{ 25{
25 int rc, irq; 26 int rc, irq;
26 struct physdev_map_pirq map_irq; 27 struct physdev_map_pirq map_irq;
@@ -41,7 +42,7 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering)
41 return -1; 42 return -1;
42 } 43 }
43 44
44 if (triggering == ACPI_EDGE_SENSITIVE) { 45 if (trigger == ACPI_EDGE_SENSITIVE) {
45 shareable = 0; 46 shareable = 0;
46 name = "ioapic-edge"; 47 name = "ioapic-edge";
47 } else { 48 } else {
@@ -55,12 +56,6 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering)
55 56
56 return irq; 57 return irq;
57} 58}
58
59static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
60 int trigger, int polarity)
61{
62 return xen_hvm_register_pirq(gsi, trigger);
63}
64#endif 59#endif
65 60
66#if defined(CONFIG_PCI_MSI) 61#if defined(CONFIG_PCI_MSI)
@@ -91,7 +86,7 @@ static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq,
91 86
92static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 87static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
93{ 88{
94 int irq, pirq, ret = 0; 89 int irq, pirq;
95 struct msi_desc *msidesc; 90 struct msi_desc *msidesc;
96 struct msi_msg msg; 91 struct msi_msg msg;
97 92
@@ -99,39 +94,32 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
99 __read_msi_msg(msidesc, &msg); 94 __read_msi_msg(msidesc, &msg);
100 pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | 95 pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) |
101 ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff); 96 ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff);
102 if (xen_irq_from_pirq(pirq) >= 0 && msg.data == XEN_PIRQ_MSI_DATA) { 97 if (msg.data != XEN_PIRQ_MSI_DATA ||
103 xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? 98 xen_irq_from_pirq(pirq) < 0) {
104 "msi-x" : "msi", &irq, &pirq, XEN_ALLOC_IRQ); 99 pirq = xen_allocate_pirq_msi(dev, msidesc);
105 if (irq < 0) 100 if (pirq < 0)
106 goto error; 101 goto error;
107 ret = set_irq_msi(irq, msidesc); 102 xen_msi_compose_msg(dev, pirq, &msg);
108 if (ret < 0) 103 __write_msi_msg(msidesc, &msg);
109 goto error_while; 104 dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
110 printk(KERN_DEBUG "xen: msi already setup: msi --> irq=%d" 105 } else {
111 " pirq=%d\n", irq, pirq); 106 dev_dbg(&dev->dev,
112 return 0; 107 "xen: msi already bound to pirq=%d\n", pirq);
113 } 108 }
114 xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? 109 irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0,
115 "msi-x" : "msi", &irq, &pirq, (XEN_ALLOC_IRQ | XEN_ALLOC_PIRQ)); 110 (type == PCI_CAP_ID_MSIX) ?
116 if (irq < 0 || pirq < 0) 111 "msi-x" : "msi");
112 if (irq < 0)
117 goto error; 113 goto error;
118 printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq); 114 dev_dbg(&dev->dev,
119 xen_msi_compose_msg(dev, pirq, &msg); 115 "xen: msi --> pirq=%d --> irq=%d\n", pirq, irq);
120 ret = set_irq_msi(irq, msidesc);
121 if (ret < 0)
122 goto error_while;
123 write_msi_msg(irq, &msg);
124 } 116 }
125 return 0; 117 return 0;
126 118
127error_while:
128 unbind_from_irqhandler(irq, NULL);
129error: 119error:
130 if (ret == -ENODEV) 120 dev_err(&dev->dev,
131 dev_err(&dev->dev, "Xen PCI frontend has not registered" \ 121 "Xen PCI frontend has not registered MSI/MSI-X support!\n");
132 " MSI/MSI-X support!\n"); 122 return -ENODEV;
133
134 return ret;
135} 123}
136 124
137/* 125/*
@@ -150,35 +138,26 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
150 return -ENOMEM; 138 return -ENOMEM;
151 139
152 if (type == PCI_CAP_ID_MSIX) 140 if (type == PCI_CAP_ID_MSIX)
153 ret = xen_pci_frontend_enable_msix(dev, &v, nvec); 141 ret = xen_pci_frontend_enable_msix(dev, v, nvec);
154 else 142 else
155 ret = xen_pci_frontend_enable_msi(dev, &v); 143 ret = xen_pci_frontend_enable_msi(dev, v);
156 if (ret) 144 if (ret)
157 goto error; 145 goto error;
158 i = 0; 146 i = 0;
159 list_for_each_entry(msidesc, &dev->msi_list, list) { 147 list_for_each_entry(msidesc, &dev->msi_list, list) {
160 irq = xen_allocate_pirq(v[i], 0, /* not sharable */ 148 irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0,
161 (type == PCI_CAP_ID_MSIX) ? 149 (type == PCI_CAP_ID_MSIX) ?
162 "pcifront-msi-x" : "pcifront-msi"); 150 "pcifront-msi-x" :
163 if (irq < 0) { 151 "pcifront-msi");
164 ret = -1; 152 if (irq < 0)
165 goto free; 153 goto free;
166 }
167
168 ret = set_irq_msi(irq, msidesc);
169 if (ret)
170 goto error_while;
171 i++; 154 i++;
172 } 155 }
173 kfree(v); 156 kfree(v);
174 return 0; 157 return 0;
175 158
176error_while:
177 unbind_from_irqhandler(irq, NULL);
178error: 159error:
179 if (ret == -ENODEV) 160 dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n");
180 dev_err(&dev->dev, "Xen PCI frontend has not registered" \
181 " MSI/MSI-X support!\n");
182free: 161free:
183 kfree(v); 162 kfree(v);
184 return ret; 163 return ret;
@@ -193,6 +172,9 @@ static void xen_teardown_msi_irqs(struct pci_dev *dev)
193 xen_pci_frontend_disable_msix(dev); 172 xen_pci_frontend_disable_msix(dev);
194 else 173 else
195 xen_pci_frontend_disable_msi(dev); 174 xen_pci_frontend_disable_msi(dev);
175
176 /* Free the IRQ's and the msidesc using the generic code. */
177 default_teardown_msi_irqs(dev);
196} 178}
197 179
198static void xen_teardown_msi_irq(unsigned int irq) 180static void xen_teardown_msi_irq(unsigned int irq)
@@ -200,47 +182,82 @@ static void xen_teardown_msi_irq(unsigned int irq)
200 xen_destroy_irq(irq); 182 xen_destroy_irq(irq);
201} 183}
202 184
185#ifdef CONFIG_XEN_DOM0
203static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 186static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
204{ 187{
205 int irq, ret; 188 int ret = 0;
206 struct msi_desc *msidesc; 189 struct msi_desc *msidesc;
207 190
208 list_for_each_entry(msidesc, &dev->msi_list, list) { 191 list_for_each_entry(msidesc, &dev->msi_list, list) {
209 irq = xen_create_msi_irq(dev, msidesc, type); 192 struct physdev_map_pirq map_irq;
210 if (irq < 0)
211 return -1;
212 193
213 ret = set_irq_msi(irq, msidesc); 194 memset(&map_irq, 0, sizeof(map_irq));
214 if (ret) 195 map_irq.domid = DOMID_SELF;
215 goto error; 196 map_irq.type = MAP_PIRQ_TYPE_MSI;
216 } 197 map_irq.index = -1;
217 return 0; 198 map_irq.pirq = -1;
199 map_irq.bus = dev->bus->number;
200 map_irq.devfn = dev->devfn;
218 201
219error: 202 if (type == PCI_CAP_ID_MSIX) {
220 xen_destroy_irq(irq); 203 int pos;
204 u32 table_offset, bir;
205
206 pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
207
208 pci_read_config_dword(dev, pos + PCI_MSIX_TABLE,
209 &table_offset);
210 bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
211
212 map_irq.table_base = pci_resource_start(dev, bir);
213 map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
214 }
215
216 ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
217 if (ret) {
218 dev_warn(&dev->dev, "xen map irq failed %d\n", ret);
219 goto out;
220 }
221
222 ret = xen_bind_pirq_msi_to_irq(dev, msidesc,
223 map_irq.pirq, map_irq.index,
224 (type == PCI_CAP_ID_MSIX) ?
225 "msi-x" : "msi");
226 if (ret < 0)
227 goto out;
228 }
229 ret = 0;
230out:
221 return ret; 231 return ret;
222} 232}
223#endif 233#endif
234#endif
224 235
225static int xen_pcifront_enable_irq(struct pci_dev *dev) 236static int xen_pcifront_enable_irq(struct pci_dev *dev)
226{ 237{
227 int rc; 238 int rc;
228 int share = 1; 239 int share = 1;
240 u8 gsi;
229 241
230 dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq); 242 rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi);
231 243 if (rc < 0) {
232 if (dev->irq < 0) 244 dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n",
233 return -EINVAL; 245 rc);
246 return rc;
247 }
234 248
235 if (dev->irq < NR_IRQS_LEGACY) 249 if (gsi < NR_IRQS_LEGACY)
236 share = 0; 250 share = 0;
237 251
238 rc = xen_allocate_pirq(dev->irq, share, "pcifront"); 252 rc = xen_allocate_pirq(gsi, share, "pcifront");
239 if (rc < 0) { 253 if (rc < 0) {
240 dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n", 254 dev_warn(&dev->dev, "Xen PCI: failed to register GSI%d: %d\n",
241 dev->irq, rc); 255 gsi, rc);
242 return rc; 256 return rc;
243 } 257 }
258
259 dev->irq = rc;
260 dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq);
244 return 0; 261 return 0;
245} 262}
246 263
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
index 68c0dbcc95be..28071bb31db7 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -15,6 +15,7 @@
15#include <linux/serial_reg.h> 15#include <linux/serial_reg.h>
16#include <linux/serial_8250.h> 16#include <linux/serial_8250.h>
17 17
18#include <asm/ce4100.h>
18#include <asm/prom.h> 19#include <asm/prom.h>
19#include <asm/setup.h> 20#include <asm/setup.h>
20#include <asm/i8259.h> 21#include <asm/i8259.h>
@@ -136,6 +137,7 @@ void __init x86_ce4100_early_setup(void)
136 x86_init.resources.probe_roms = x86_init_noop; 137 x86_init.resources.probe_roms = x86_init_noop;
137 x86_init.mpparse.get_smp_config = x86_init_uint_noop; 138 x86_init.mpparse.get_smp_config = x86_init_uint_noop;
138 x86_init.mpparse.find_smp_config = x86_init_noop; 139 x86_init.mpparse.find_smp_config = x86_init_noop;
140 x86_init.pci.init = ce4100_pci_init;
139 141
140#ifdef CONFIG_X86_IO_APIC 142#ifdef CONFIG_X86_IO_APIC
141 x86_init.pci.init_irq = sdv_pci_init; 143 x86_init.pci.init_irq = sdv_pci_init;
diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c
index dab874647530..044bda5b3174 100644
--- a/arch/x86/platform/olpc/olpc_dt.c
+++ b/arch/x86/platform/olpc/olpc_dt.c
@@ -140,8 +140,7 @@ void * __init prom_early_alloc(unsigned long size)
140 * wasted bootmem) and hand off chunks of it to callers. 140 * wasted bootmem) and hand off chunks of it to callers.
141 */ 141 */
142 res = alloc_bootmem(chunk_size); 142 res = alloc_bootmem(chunk_size);
143 if (!res) 143 BUG_ON(!res);
144 return NULL;
145 prom_early_allocated += chunk_size; 144 prom_early_allocated += chunk_size;
146 memset(res, 0, chunk_size); 145 memset(res, 0, chunk_size);
147 free_mem = chunk_size; 146 free_mem = chunk_size;
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index df58e9cad96a..a7b38d35c29a 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1364,11 +1364,11 @@ uv_activation_descriptor_init(int node, int pnode)
1364 memset(bd2, 0, sizeof(struct bau_desc)); 1364 memset(bd2, 0, sizeof(struct bau_desc));
1365 bd2->header.sw_ack_flag = 1; 1365 bd2->header.sw_ack_flag = 1;
1366 /* 1366 /*
1367 * base_dest_nodeid is the nasid (pnode<<1) of the first uvhub 1367 * base_dest_nodeid is the nasid of the first uvhub
1368 * in the partition. The bit map will indicate uvhub numbers, 1368 * in the partition. The bit map will indicate uvhub numbers,
1369 * which are 0-N in a partition. Pnodes are unique system-wide. 1369 * which are 0-N in a partition. Pnodes are unique system-wide.
1370 */ 1370 */
1371 bd2->header.base_dest_nodeid = uv_partition_base_pnode << 1; 1371 bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode);
1372 bd2->header.dest_subnodeid = 0x10; /* the LB */ 1372 bd2->header.dest_subnodeid = 0x10; /* the LB */
1373 bd2->header.command = UV_NET_ENDPOINT_INTD; 1373 bd2->header.command = UV_NET_ENDPOINT_INTD;
1374 bd2->header.int_both = 1; 1374 bd2->header.int_both = 1;
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index 7b24460917d5..374a05d8ad22 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -131,7 +131,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
131 unsigned long mmr_offset, int limit) 131 unsigned long mmr_offset, int limit)
132{ 132{
133 const struct cpumask *eligible_cpu = cpumask_of(cpu); 133 const struct cpumask *eligible_cpu = cpumask_of(cpu);
134 struct irq_cfg *cfg = get_irq_chip_data(irq); 134 struct irq_cfg *cfg = irq_get_chip_data(irq);
135 unsigned long mmr_value; 135 unsigned long mmr_value;
136 struct uv_IO_APIC_route_entry *entry; 136 struct uv_IO_APIC_route_entry *entry;
137 int mmr_pnode, err; 137 int mmr_pnode, err;
@@ -148,7 +148,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
148 else 148 else
149 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); 149 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
150 150
151 set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, 151 irq_set_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
152 irq_name); 152 irq_name);
153 153
154 mmr_value = 0; 154 mmr_value = 0;
diff --git a/arch/x86/platform/visws/visws_quirks.c b/arch/x86/platform/visws/visws_quirks.c
index 632037671746..fe4cf8294878 100644
--- a/arch/x86/platform/visws/visws_quirks.c
+++ b/arch/x86/platform/visws/visws_quirks.c
@@ -569,11 +569,13 @@ out_unlock:
569static struct irqaction master_action = { 569static struct irqaction master_action = {
570 .handler = piix4_master_intr, 570 .handler = piix4_master_intr,
571 .name = "PIIX4-8259", 571 .name = "PIIX4-8259",
572 .flags = IRQF_NO_THREAD,
572}; 573};
573 574
574static struct irqaction cascade_action = { 575static struct irqaction cascade_action = {
575 .handler = no_action, 576 .handler = no_action,
576 .name = "cascade", 577 .name = "cascade",
578 .flags = IRQF_NO_THREAD,
577}; 579};
578 580
579static inline void set_piix4_virtual_irq_type(void) 581static inline void set_piix4_virtual_irq_type(void)
@@ -606,7 +608,7 @@ static void __init visws_pre_intr_init(void)
606 chip = &cobalt_irq_type; 608 chip = &cobalt_irq_type;
607 609
608 if (chip) 610 if (chip)
609 set_irq_chip(i, chip); 611 irq_set_chip(i, chip);
610 } 612 }
611 613
612 setup_irq(CO_IRQ_8259, &master_action); 614 setup_irq(CO_IRQ_8259, &master_action);
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 5b54892e4bc3..e4343fe488ed 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -48,3 +48,11 @@ config XEN_DEBUG_FS
48 help 48 help
49 Enable statistics output and various tuning options in debugfs. 49 Enable statistics output and various tuning options in debugfs.
50 Enabling this option may incur a significant performance overhead. 50 Enabling this option may incur a significant performance overhead.
51
52config XEN_DEBUG
53 bool "Enable Xen debug checks"
54 depends on XEN
55 default n
56 help
57 Enable various WARN_ON checks in the Xen MMU code.
58 Enabling this option WILL incur a significant performance overhead.
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 50542efe45fb..49dbd78ec3cb 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1284,15 +1284,14 @@ static int init_hvm_pv_info(int *major, int *minor)
1284 1284
1285 xen_setup_features(); 1285 xen_setup_features();
1286 1286
1287 pv_info = xen_info; 1287 pv_info.name = "Xen HVM";
1288 pv_info.kernel_rpl = 0;
1289 1288
1290 xen_domain_type = XEN_HVM_DOMAIN; 1289 xen_domain_type = XEN_HVM_DOMAIN;
1291 1290
1292 return 0; 1291 return 0;
1293} 1292}
1294 1293
1295void xen_hvm_init_shared_info(void) 1294void __ref xen_hvm_init_shared_info(void)
1296{ 1295{
1297 int cpu; 1296 int cpu;
1298 struct xen_add_to_physmap xatp; 1297 struct xen_add_to_physmap xatp;
@@ -1331,6 +1330,8 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
1331 switch (action) { 1330 switch (action) {
1332 case CPU_UP_PREPARE: 1331 case CPU_UP_PREPARE:
1333 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; 1332 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
1333 if (xen_have_vector_callback)
1334 xen_init_lock_cpu(cpu);
1334 break; 1335 break;
1335 default: 1336 default:
1336 break; 1337 break;
@@ -1355,6 +1356,7 @@ static void __init xen_hvm_guest_init(void)
1355 1356
1356 if (xen_feature(XENFEAT_hvm_callback_vector)) 1357 if (xen_feature(XENFEAT_hvm_callback_vector))
1357 xen_have_vector_callback = 1; 1358 xen_have_vector_callback = 1;
1359 xen_hvm_smp_init();
1358 register_cpu_notifier(&xen_hvm_cpu_notifier); 1360 register_cpu_notifier(&xen_hvm_cpu_notifier);
1359 xen_unplug_emulated_devices(); 1361 xen_unplug_emulated_devices();
1360 have_vcpu_info_placement = 0; 1362 have_vcpu_info_placement = 0;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 5e92b61ad574..3f6f3347aa17 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -46,6 +46,7 @@
46#include <linux/module.h> 46#include <linux/module.h>
47#include <linux/gfp.h> 47#include <linux/gfp.h>
48#include <linux/memblock.h> 48#include <linux/memblock.h>
49#include <linux/seq_file.h>
49 50
50#include <asm/pgtable.h> 51#include <asm/pgtable.h>
51#include <asm/tlbflush.h> 52#include <asm/tlbflush.h>
@@ -416,8 +417,12 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
416 if (val & _PAGE_PRESENT) { 417 if (val & _PAGE_PRESENT) {
417 unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; 418 unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
418 pteval_t flags = val & PTE_FLAGS_MASK; 419 pteval_t flags = val & PTE_FLAGS_MASK;
419 unsigned long mfn = pfn_to_mfn(pfn); 420 unsigned long mfn;
420 421
422 if (!xen_feature(XENFEAT_auto_translated_physmap))
423 mfn = get_phys_to_machine(pfn);
424 else
425 mfn = pfn;
421 /* 426 /*
422 * If there's no mfn for the pfn, then just create an 427 * If there's no mfn for the pfn, then just create an
423 * empty non-present pte. Unfortunately this loses 428 * empty non-present pte. Unfortunately this loses
@@ -427,8 +432,18 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
427 if (unlikely(mfn == INVALID_P2M_ENTRY)) { 432 if (unlikely(mfn == INVALID_P2M_ENTRY)) {
428 mfn = 0; 433 mfn = 0;
429 flags = 0; 434 flags = 0;
435 } else {
436 /*
437 * Paramount to do this test _after_ the
438 * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY &
439 * IDENTITY_FRAME_BIT resolves to true.
440 */
441 mfn &= ~FOREIGN_FRAME_BIT;
442 if (mfn & IDENTITY_FRAME_BIT) {
443 mfn &= ~IDENTITY_FRAME_BIT;
444 flags |= _PAGE_IOMAP;
445 }
430 } 446 }
431
432 val = ((pteval_t)mfn << PAGE_SHIFT) | flags; 447 val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
433 } 448 }
434 449
@@ -532,6 +547,41 @@ pte_t xen_make_pte(pteval_t pte)
532} 547}
533PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); 548PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
534 549
550#ifdef CONFIG_XEN_DEBUG
551pte_t xen_make_pte_debug(pteval_t pte)
552{
553 phys_addr_t addr = (pte & PTE_PFN_MASK);
554 phys_addr_t other_addr;
555 bool io_page = false;
556 pte_t _pte;
557
558 if (pte & _PAGE_IOMAP)
559 io_page = true;
560
561 _pte = xen_make_pte(pte);
562
563 if (!addr)
564 return _pte;
565
566 if (io_page &&
567 (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
568 other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT;
569 WARN(addr != other_addr,
570 "0x%lx is using VM_IO, but it is 0x%lx!\n",
571 (unsigned long)addr, (unsigned long)other_addr);
572 } else {
573 pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP;
574 other_addr = (_pte.pte & PTE_PFN_MASK);
575 WARN((addr == other_addr) && (!io_page) && (!iomap_set),
576 "0x%lx is missing VM_IO (and wasn't fixed)!\n",
577 (unsigned long)addr);
578 }
579
580 return _pte;
581}
582PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug);
583#endif
584
535pgd_t xen_make_pgd(pgdval_t pgd) 585pgd_t xen_make_pgd(pgdval_t pgd)
536{ 586{
537 pgd = pte_pfn_to_mfn(pgd); 587 pgd = pte_pfn_to_mfn(pgd);
@@ -986,10 +1036,9 @@ static void xen_pgd_pin(struct mm_struct *mm)
986 */ 1036 */
987void xen_mm_pin_all(void) 1037void xen_mm_pin_all(void)
988{ 1038{
989 unsigned long flags;
990 struct page *page; 1039 struct page *page;
991 1040
992 spin_lock_irqsave(&pgd_lock, flags); 1041 spin_lock(&pgd_lock);
993 1042
994 list_for_each_entry(page, &pgd_list, lru) { 1043 list_for_each_entry(page, &pgd_list, lru) {
995 if (!PagePinned(page)) { 1044 if (!PagePinned(page)) {
@@ -998,7 +1047,7 @@ void xen_mm_pin_all(void)
998 } 1047 }
999 } 1048 }
1000 1049
1001 spin_unlock_irqrestore(&pgd_lock, flags); 1050 spin_unlock(&pgd_lock);
1002} 1051}
1003 1052
1004/* 1053/*
@@ -1099,10 +1148,9 @@ static void xen_pgd_unpin(struct mm_struct *mm)
1099 */ 1148 */
1100void xen_mm_unpin_all(void) 1149void xen_mm_unpin_all(void)
1101{ 1150{
1102 unsigned long flags;
1103 struct page *page; 1151 struct page *page;
1104 1152
1105 spin_lock_irqsave(&pgd_lock, flags); 1153 spin_lock(&pgd_lock);
1106 1154
1107 list_for_each_entry(page, &pgd_list, lru) { 1155 list_for_each_entry(page, &pgd_list, lru) {
1108 if (PageSavePinned(page)) { 1156 if (PageSavePinned(page)) {
@@ -1112,7 +1160,7 @@ void xen_mm_unpin_all(void)
1112 } 1160 }
1113 } 1161 }
1114 1162
1115 spin_unlock_irqrestore(&pgd_lock, flags); 1163 spin_unlock(&pgd_lock);
1116} 1164}
1117 1165
1118void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) 1166void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
@@ -1443,7 +1491,7 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
1443 * early_ioremap fixmap slot, make sure it is RO. 1491 * early_ioremap fixmap slot, make sure it is RO.
1444 */ 1492 */
1445 if (!is_early_ioremap_ptep(ptep) && 1493 if (!is_early_ioremap_ptep(ptep) &&
1446 pfn >= e820_table_start && pfn < e820_table_end) 1494 pfn >= pgt_buf_start && pfn < pgt_buf_end)
1447 pte = pte_wrprotect(pte); 1495 pte = pte_wrprotect(pte);
1448 1496
1449 return pte; 1497 return pte;
@@ -1942,6 +1990,9 @@ __init void xen_ident_map_ISA(void)
1942 1990
1943static __init void xen_post_allocator_init(void) 1991static __init void xen_post_allocator_init(void)
1944{ 1992{
1993#ifdef CONFIG_XEN_DEBUG
1994 pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
1995#endif
1945 pv_mmu_ops.set_pte = xen_set_pte; 1996 pv_mmu_ops.set_pte = xen_set_pte;
1946 pv_mmu_ops.set_pmd = xen_set_pmd; 1997 pv_mmu_ops.set_pmd = xen_set_pmd;
1947 pv_mmu_ops.set_pud = xen_set_pud; 1998 pv_mmu_ops.set_pud = xen_set_pud;
@@ -2074,7 +2125,7 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
2074 in_frames[i] = virt_to_mfn(vaddr); 2125 in_frames[i] = virt_to_mfn(vaddr);
2075 2126
2076 MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0); 2127 MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
2077 set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY); 2128 __set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
2078 2129
2079 if (out_frames) 2130 if (out_frames)
2080 out_frames[i] = virt_to_pfn(vaddr); 2131 out_frames[i] = virt_to_pfn(vaddr);
@@ -2353,6 +2404,18 @@ EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
2353 2404
2354#ifdef CONFIG_XEN_DEBUG_FS 2405#ifdef CONFIG_XEN_DEBUG_FS
2355 2406
2407static int p2m_dump_open(struct inode *inode, struct file *filp)
2408{
2409 return single_open(filp, p2m_dump_show, NULL);
2410}
2411
2412static const struct file_operations p2m_dump_fops = {
2413 .open = p2m_dump_open,
2414 .read = seq_read,
2415 .llseek = seq_lseek,
2416 .release = single_release,
2417};
2418
2356static struct dentry *d_mmu_debug; 2419static struct dentry *d_mmu_debug;
2357 2420
2358static int __init xen_mmu_debugfs(void) 2421static int __init xen_mmu_debugfs(void)
@@ -2408,6 +2471,7 @@ static int __init xen_mmu_debugfs(void)
2408 debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug, 2471 debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug,
2409 &mmu_stats.prot_commit_batched); 2472 &mmu_stats.prot_commit_batched);
2410 2473
2474 debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
2411 return 0; 2475 return 0;
2412} 2476}
2413fs_initcall(xen_mmu_debugfs); 2477fs_initcall(xen_mmu_debugfs);
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index fd12d7ce7ff9..215a3ce61068 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -23,6 +23,129 @@
23 * P2M_PER_PAGE depends on the architecture, as a mfn is always 23 * P2M_PER_PAGE depends on the architecture, as a mfn is always
24 * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to 24 * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
25 * 512 and 1024 entries respectively. 25 * 512 and 1024 entries respectively.
26 *
27 * In short, these structures contain the Machine Frame Number (MFN) of the PFN.
28 *
29 * However not all entries are filled with MFNs. Specifically for all other
30 * leaf entries, or for the top root, or middle one, for which there is a void
31 * entry, we assume it is "missing". So (for example)
32 * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY.
33 *
34 * We also have the possibility of setting 1-1 mappings on certain regions, so
35 * that:
36 * pfn_to_mfn(0xc0000)=0xc0000
37 *
38 * The benefit of this is, that we can assume for non-RAM regions (think
39 * PCI BARs, or ACPI spaces), we can create mappings easily b/c we
40 * get the PFN value to match the MFN.
41 *
42 * For this to work efficiently we have one new page p2m_identity and
43 * allocate (via reserved_brk) any other pages we need to cover the sides
44 * (1GB or 4MB boundary violations). All entries in p2m_identity are set to
45 * INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs,
46 * no other fancy value).
47 *
48 * On lookup we spot that the entry points to p2m_identity and return the
49 * identity value instead of dereferencing and returning INVALID_P2M_ENTRY.
50 * If the entry points to an allocated page, we just proceed as before and
51 * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in
52 * appropriate functions (pfn_to_mfn).
53 *
54 * The reason for having the IDENTITY_FRAME_BIT instead of just returning the
55 * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a
56 * non-identity pfn. To protect ourselves against we elect to set (and get) the
57 * IDENTITY_FRAME_BIT on all identity mapped PFNs.
58 *
59 * This simplistic diagram is used to explain the more subtle piece of code.
60 * There is also a digram of the P2M at the end that can help.
61 * Imagine your E820 looking as so:
62 *
63 * 1GB 2GB
64 * /-------------------+---------\/----\ /----------\ /---+-----\
65 * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM |
66 * \-------------------+---------/\----/ \----------/ \---+-----/
67 * ^- 1029MB ^- 2001MB
68 *
69 * [1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100),
70 * 2048MB = 524288 (0x80000)]
71 *
72 * And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB
73 * is actually not present (would have to kick the balloon driver to put it in).
74 *
75 * When we are told to set the PFNs for identity mapping (see patch: "xen/setup:
76 * Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start
77 * of the PFN and the end PFN (263424 and 512256 respectively). The first step
78 * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page
79 * covers 512^2 of page estate (1GB) and in case the start or end PFN is not
80 * aligned on 512^2*PAGE_SIZE (1GB) we loop on aligned 1GB PFNs from start pfn
81 * to end pfn. We reserve_brk top leaf pages if they are missing (means they
82 * point to p2m_mid_missing).
83 *
84 * With the E820 example above, 263424 is not 1GB aligned so we allocate a
85 * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000.
86 * Each entry in the allocate page is "missing" (points to p2m_missing).
87 *
88 * Next stage is to determine if we need to do a more granular boundary check
89 * on the 4MB (or 2MB depending on architecture) off the start and end pfn's.
90 * We check if the start pfn and end pfn violate that boundary check, and if
91 * so reserve_brk a middle (p2m[x][y]) leaf page. This way we have a much finer
92 * granularity of setting which PFNs are missing and which ones are identity.
93 * In our example 263424 and 512256 both fail the check so we reserve_brk two
94 * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing"
95 * values) and assign them to p2m[1][2] and p2m[1][488] respectively.
96 *
97 * At this point we would at minimum reserve_brk one page, but could be up to
98 * three. Each call to set_phys_range_identity has at maximum a three page
99 * cost. If we were to query the P2M at this stage, all those entries from
100 * start PFN through end PFN (so 1029MB -> 2001MB) would return
101 * INVALID_P2M_ENTRY ("missing").
102 *
103 * The next step is to walk from the start pfn to the end pfn setting
104 * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity.
105 * If we find that the middle leaf is pointing to p2m_missing we can swap it
106 * over to p2m_identity - this way covering 4MB (or 2MB) PFN space. At this
107 * point we do not need to worry about boundary aligment (so no need to
108 * reserve_brk a middle page, figure out which PFNs are "missing" and which
109 * ones are identity), as that has been done earlier. If we find that the
110 * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference
111 * that page (which covers 512 PFNs) and set the appropriate PFN with
112 * IDENTITY_FRAME_BIT. In our example 263424 and 512256 end up there, and we
113 * set from p2m[1][2][256->511] and p2m[1][488][0->256] with
114 * IDENTITY_FRAME_BIT set.
115 *
116 * All other regions that are void (or not filled) either point to p2m_missing
117 * (considered missing) or have the default value of INVALID_P2M_ENTRY (also
118 * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511]
119 * contain the INVALID_P2M_ENTRY value and are considered "missing."
120 *
121 * This is what the p2m ends up looking (for the E820 above) with this
122 * fabulous drawing:
123 *
124 * p2m /--------------\
125 * /-----\ | &mfn_list[0],| /-----------------\
126 * | 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. |
127 * |-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] |
128 * | 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] |
129 * |-----| \ | [p2m_identity]+\\ | .... |
130 * | 2 |--\ \-------------------->| ... | \\ \----------------/
131 * |-----| \ \---------------/ \\
132 * | 3 |\ \ \\ p2m_identity
133 * |-----| \ \-------------------->/---------------\ /-----------------\
134 * | .. +->+ | [p2m_identity]+-->| ~0, ~0, ~0, ... |
135 * \-----/ / | [p2m_identity]+-->| ..., ~0 |
136 * / /---------------\ | .... | \-----------------/
137 * / | IDENTITY[@0] | /-+-[x], ~0, ~0.. |
138 * / | IDENTITY[@256]|<----/ \---------------/
139 * / | ~0, ~0, .... |
140 * | \---------------/
141 * |
142 * p2m_missing p2m_missing
143 * /------------------\ /------------\
144 * | [p2m_mid_missing]+---->| ~0, ~0, ~0 |
145 * | [p2m_mid_missing]+---->| ..., ~0 |
146 * \------------------/ \------------/
147 *
148 * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
26 */ 149 */
27 150
28#include <linux/init.h> 151#include <linux/init.h>
@@ -30,6 +153,7 @@
30#include <linux/list.h> 153#include <linux/list.h>
31#include <linux/hash.h> 154#include <linux/hash.h>
32#include <linux/sched.h> 155#include <linux/sched.h>
156#include <linux/seq_file.h>
33 157
34#include <asm/cache.h> 158#include <asm/cache.h>
35#include <asm/setup.h> 159#include <asm/setup.h>
@@ -59,9 +183,15 @@ static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
59static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); 183static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
60static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); 184static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
61 185
186static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
187
62RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); 188RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
63RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); 189RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
64 190
191/* We might hit two boundary violations at the start and end, at max each
192 * boundary violation will require three middle nodes. */
193RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3);
194
65static inline unsigned p2m_top_index(unsigned long pfn) 195static inline unsigned p2m_top_index(unsigned long pfn)
66{ 196{
67 BUG_ON(pfn >= MAX_P2M_PFN); 197 BUG_ON(pfn >= MAX_P2M_PFN);
@@ -136,7 +266,7 @@ static void p2m_init(unsigned long *p2m)
136 * - After resume we're called from within stop_machine, but the mfn 266 * - After resume we're called from within stop_machine, but the mfn
137 * tree should alreay be completely allocated. 267 * tree should alreay be completely allocated.
138 */ 268 */
139void xen_build_mfn_list_list(void) 269void __ref xen_build_mfn_list_list(void)
140{ 270{
141 unsigned long pfn; 271 unsigned long pfn;
142 272
@@ -221,6 +351,9 @@ void __init xen_build_dynamic_phys_to_machine(void)
221 p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); 351 p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
222 p2m_top_init(p2m_top); 352 p2m_top_init(p2m_top);
223 353
354 p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
355 p2m_init(p2m_identity);
356
224 /* 357 /*
225 * The domain builder gives us a pre-constructed p2m array in 358 * The domain builder gives us a pre-constructed p2m array in
226 * mfn_list for all the pages initially given to us, so we just 359 * mfn_list for all the pages initially given to us, so we just
@@ -266,6 +399,14 @@ unsigned long get_phys_to_machine(unsigned long pfn)
266 mididx = p2m_mid_index(pfn); 399 mididx = p2m_mid_index(pfn);
267 idx = p2m_index(pfn); 400 idx = p2m_index(pfn);
268 401
402 /*
403 * The INVALID_P2M_ENTRY is filled in both p2m_*identity
404 * and in p2m_*missing, so returning the INVALID_P2M_ENTRY
405 * would be wrong.
406 */
407 if (p2m_top[topidx][mididx] == p2m_identity)
408 return IDENTITY_FRAME(pfn);
409
269 return p2m_top[topidx][mididx][idx]; 410 return p2m_top[topidx][mididx][idx];
270} 411}
271EXPORT_SYMBOL_GPL(get_phys_to_machine); 412EXPORT_SYMBOL_GPL(get_phys_to_machine);
@@ -335,9 +476,11 @@ static bool alloc_p2m(unsigned long pfn)
335 p2m_top_mfn_p[topidx] = mid_mfn; 476 p2m_top_mfn_p[topidx] = mid_mfn;
336 } 477 }
337 478
338 if (p2m_top[topidx][mididx] == p2m_missing) { 479 if (p2m_top[topidx][mididx] == p2m_identity ||
480 p2m_top[topidx][mididx] == p2m_missing) {
339 /* p2m leaf page is missing */ 481 /* p2m leaf page is missing */
340 unsigned long *p2m; 482 unsigned long *p2m;
483 unsigned long *p2m_orig = p2m_top[topidx][mididx];
341 484
342 p2m = alloc_p2m_page(); 485 p2m = alloc_p2m_page();
343 if (!p2m) 486 if (!p2m)
@@ -345,7 +488,7 @@ static bool alloc_p2m(unsigned long pfn)
345 488
346 p2m_init(p2m); 489 p2m_init(p2m);
347 490
348 if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing) 491 if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig)
349 free_p2m_page(p2m); 492 free_p2m_page(p2m);
350 else 493 else
351 mid_mfn[mididx] = virt_to_mfn(p2m); 494 mid_mfn[mididx] = virt_to_mfn(p2m);
@@ -354,11 +497,91 @@ static bool alloc_p2m(unsigned long pfn)
354 return true; 497 return true;
355} 498}
356 499
500bool __early_alloc_p2m(unsigned long pfn)
501{
502 unsigned topidx, mididx, idx;
503
504 topidx = p2m_top_index(pfn);
505 mididx = p2m_mid_index(pfn);
506 idx = p2m_index(pfn);
507
508 /* Pfff.. No boundary cross-over, lets get out. */
509 if (!idx)
510 return false;
511
512 WARN(p2m_top[topidx][mididx] == p2m_identity,
513 "P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n",
514 topidx, mididx);
515
516 /*
517 * Could be done by xen_build_dynamic_phys_to_machine..
518 */
519 if (p2m_top[topidx][mididx] != p2m_missing)
520 return false;
521
522 /* Boundary cross-over for the edges: */
523 if (idx) {
524 unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
525
526 p2m_init(p2m);
527
528 p2m_top[topidx][mididx] = p2m;
529
530 }
531 return idx != 0;
532}
533unsigned long set_phys_range_identity(unsigned long pfn_s,
534 unsigned long pfn_e)
535{
536 unsigned long pfn;
537
538 if (unlikely(pfn_s >= MAX_P2M_PFN || pfn_e >= MAX_P2M_PFN))
539 return 0;
540
541 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
542 return pfn_e - pfn_s;
543
544 if (pfn_s > pfn_e)
545 return 0;
546
547 for (pfn = (pfn_s & ~(P2M_MID_PER_PAGE * P2M_PER_PAGE - 1));
548 pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE));
549 pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE)
550 {
551 unsigned topidx = p2m_top_index(pfn);
552 if (p2m_top[topidx] == p2m_mid_missing) {
553 unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
554
555 p2m_mid_init(mid);
556
557 p2m_top[topidx] = mid;
558 }
559 }
560
561 __early_alloc_p2m(pfn_s);
562 __early_alloc_p2m(pfn_e);
563
564 for (pfn = pfn_s; pfn < pfn_e; pfn++)
565 if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn)))
566 break;
567
568 if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s),
569 "Identity mapping failed. We are %ld short of 1-1 mappings!\n",
570 (pfn_e - pfn_s) - (pfn - pfn_s)))
571 printk(KERN_DEBUG "1-1 mapping on %lx->%lx\n", pfn_s, pfn);
572
573 return pfn - pfn_s;
574}
575
357/* Try to install p2m mapping; fail if intermediate bits missing */ 576/* Try to install p2m mapping; fail if intermediate bits missing */
358bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) 577bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
359{ 578{
360 unsigned topidx, mididx, idx; 579 unsigned topidx, mididx, idx;
361 580
581 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
582 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
583 return true;
584 }
362 if (unlikely(pfn >= MAX_P2M_PFN)) { 585 if (unlikely(pfn >= MAX_P2M_PFN)) {
363 BUG_ON(mfn != INVALID_P2M_ENTRY); 586 BUG_ON(mfn != INVALID_P2M_ENTRY);
364 return true; 587 return true;
@@ -368,6 +591,21 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
368 mididx = p2m_mid_index(pfn); 591 mididx = p2m_mid_index(pfn);
369 idx = p2m_index(pfn); 592 idx = p2m_index(pfn);
370 593
594 /* For sparse holes were the p2m leaf has real PFN along with
595 * PCI holes, stick in the PFN as the MFN value.
596 */
597 if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) {
598 if (p2m_top[topidx][mididx] == p2m_identity)
599 return true;
600
601 /* Swap over from MISSING to IDENTITY if needed. */
602 if (p2m_top[topidx][mididx] == p2m_missing) {
603 WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing,
604 p2m_identity) != p2m_missing);
605 return true;
606 }
607 }
608
371 if (p2m_top[topidx][mididx] == p2m_missing) 609 if (p2m_top[topidx][mididx] == p2m_missing)
372 return mfn == INVALID_P2M_ENTRY; 610 return mfn == INVALID_P2M_ENTRY;
373 611
@@ -378,11 +616,6 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
378 616
379bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) 617bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
380{ 618{
381 if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
382 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
383 return true;
384 }
385
386 if (unlikely(!__set_phys_to_machine(pfn, mfn))) { 619 if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
387 if (!alloc_p2m(pfn)) 620 if (!alloc_p2m(pfn))
388 return false; 621 return false;
@@ -421,7 +654,7 @@ int m2p_add_override(unsigned long mfn, struct page *page)
421{ 654{
422 unsigned long flags; 655 unsigned long flags;
423 unsigned long pfn; 656 unsigned long pfn;
424 unsigned long address; 657 unsigned long uninitialized_var(address);
425 unsigned level; 658 unsigned level;
426 pte_t *ptep = NULL; 659 pte_t *ptep = NULL;
427 660
@@ -455,7 +688,7 @@ int m2p_remove_override(struct page *page)
455 unsigned long flags; 688 unsigned long flags;
456 unsigned long mfn; 689 unsigned long mfn;
457 unsigned long pfn; 690 unsigned long pfn;
458 unsigned long address; 691 unsigned long uninitialized_var(address);
459 unsigned level; 692 unsigned level;
460 pte_t *ptep = NULL; 693 pte_t *ptep = NULL;
461 694
@@ -520,3 +753,80 @@ unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
520 return ret; 753 return ret;
521} 754}
522EXPORT_SYMBOL_GPL(m2p_find_override_pfn); 755EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
756
757#ifdef CONFIG_XEN_DEBUG_FS
758
759int p2m_dump_show(struct seq_file *m, void *v)
760{
761 static const char * const level_name[] = { "top", "middle",
762 "entry", "abnormal" };
763 static const char * const type_name[] = { "identity", "missing",
764 "pfn", "abnormal"};
765#define TYPE_IDENTITY 0
766#define TYPE_MISSING 1
767#define TYPE_PFN 2
768#define TYPE_UNKNOWN 3
769 unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0;
770 unsigned int uninitialized_var(prev_level);
771 unsigned int uninitialized_var(prev_type);
772
773 if (!p2m_top)
774 return 0;
775
776 for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) {
777 unsigned topidx = p2m_top_index(pfn);
778 unsigned mididx = p2m_mid_index(pfn);
779 unsigned idx = p2m_index(pfn);
780 unsigned lvl, type;
781
782 lvl = 4;
783 type = TYPE_UNKNOWN;
784 if (p2m_top[topidx] == p2m_mid_missing) {
785 lvl = 0; type = TYPE_MISSING;
786 } else if (p2m_top[topidx] == NULL) {
787 lvl = 0; type = TYPE_UNKNOWN;
788 } else if (p2m_top[topidx][mididx] == NULL) {
789 lvl = 1; type = TYPE_UNKNOWN;
790 } else if (p2m_top[topidx][mididx] == p2m_identity) {
791 lvl = 1; type = TYPE_IDENTITY;
792 } else if (p2m_top[topidx][mididx] == p2m_missing) {
793 lvl = 1; type = TYPE_MISSING;
794 } else if (p2m_top[topidx][mididx][idx] == 0) {
795 lvl = 2; type = TYPE_UNKNOWN;
796 } else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) {
797 lvl = 2; type = TYPE_IDENTITY;
798 } else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) {
799 lvl = 2; type = TYPE_MISSING;
800 } else if (p2m_top[topidx][mididx][idx] == pfn) {
801 lvl = 2; type = TYPE_PFN;
802 } else if (p2m_top[topidx][mididx][idx] != pfn) {
803 lvl = 2; type = TYPE_PFN;
804 }
805 if (pfn == 0) {
806 prev_level = lvl;
807 prev_type = type;
808 }
809 if (pfn == MAX_DOMAIN_PAGES-1) {
810 lvl = 3;
811 type = TYPE_UNKNOWN;
812 }
813 if (prev_type != type) {
814 seq_printf(m, " [0x%lx->0x%lx] %s\n",
815 prev_pfn_type, pfn, type_name[prev_type]);
816 prev_pfn_type = pfn;
817 prev_type = type;
818 }
819 if (prev_level != lvl) {
820 seq_printf(m, " [0x%lx->0x%lx] level %s\n",
821 prev_pfn_level, pfn, level_name[prev_level]);
822 prev_pfn_level = pfn;
823 prev_level = lvl;
824 }
825 }
826 return 0;
827#undef TYPE_IDENTITY
828#undef TYPE_MISSING
829#undef TYPE_PFN
830#undef TYPE_UNKNOWN
831}
832#endif
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index a8a66a50d446..fa0269a99377 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -52,6 +52,8 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
52 52
53static __init void xen_add_extra_mem(unsigned long pages) 53static __init void xen_add_extra_mem(unsigned long pages)
54{ 54{
55 unsigned long pfn;
56
55 u64 size = (u64)pages * PAGE_SIZE; 57 u64 size = (u64)pages * PAGE_SIZE;
56 u64 extra_start = xen_extra_mem_start + xen_extra_mem_size; 58 u64 extra_start = xen_extra_mem_start + xen_extra_mem_size;
57 59
@@ -66,6 +68,9 @@ static __init void xen_add_extra_mem(unsigned long pages)
66 xen_extra_mem_size += size; 68 xen_extra_mem_size += size;
67 69
68 xen_max_p2m_pfn = PFN_DOWN(extra_start + size); 70 xen_max_p2m_pfn = PFN_DOWN(extra_start + size);
71
72 for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++)
73 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
69} 74}
70 75
71static unsigned long __init xen_release_chunk(phys_addr_t start_addr, 76static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
@@ -104,7 +109,7 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
104 WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n", 109 WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n",
105 start, end, ret); 110 start, end, ret);
106 if (ret == 1) { 111 if (ret == 1) {
107 set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 112 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
108 len++; 113 len++;
109 } 114 }
110 } 115 }
@@ -138,12 +143,55 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
138 return released; 143 return released;
139} 144}
140 145
146static unsigned long __init xen_set_identity(const struct e820entry *list,
147 ssize_t map_size)
148{
149 phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS;
150 phys_addr_t start_pci = last;
151 const struct e820entry *entry;
152 unsigned long identity = 0;
153 int i;
154
155 for (i = 0, entry = list; i < map_size; i++, entry++) {
156 phys_addr_t start = entry->addr;
157 phys_addr_t end = start + entry->size;
158
159 if (start < last)
160 start = last;
161
162 if (end <= start)
163 continue;
164
165 /* Skip over the 1MB region. */
166 if (last > end)
167 continue;
168
169 if (entry->type == E820_RAM) {
170 if (start > start_pci)
171 identity += set_phys_range_identity(
172 PFN_UP(start_pci), PFN_DOWN(start));
173
174 /* Without saving 'last' we would gooble RAM too
175 * at the end of the loop. */
176 last = end;
177 start_pci = end;
178 continue;
179 }
180 start_pci = min(start, start_pci);
181 last = end;
182 }
183 if (last > start_pci)
184 identity += set_phys_range_identity(
185 PFN_UP(start_pci), PFN_DOWN(last));
186 return identity;
187}
141/** 188/**
142 * machine_specific_memory_setup - Hook for machine specific memory setup. 189 * machine_specific_memory_setup - Hook for machine specific memory setup.
143 **/ 190 **/
144char * __init xen_memory_setup(void) 191char * __init xen_memory_setup(void)
145{ 192{
146 static struct e820entry map[E820MAX] __initdata; 193 static struct e820entry map[E820MAX] __initdata;
194 static struct e820entry map_raw[E820MAX] __initdata;
147 195
148 unsigned long max_pfn = xen_start_info->nr_pages; 196 unsigned long max_pfn = xen_start_info->nr_pages;
149 unsigned long long mem_end; 197 unsigned long long mem_end;
@@ -151,6 +199,7 @@ char * __init xen_memory_setup(void)
151 struct xen_memory_map memmap; 199 struct xen_memory_map memmap;
152 unsigned long extra_pages = 0; 200 unsigned long extra_pages = 0;
153 unsigned long extra_limit; 201 unsigned long extra_limit;
202 unsigned long identity_pages = 0;
154 int i; 203 int i;
155 int op; 204 int op;
156 205
@@ -176,6 +225,7 @@ char * __init xen_memory_setup(void)
176 } 225 }
177 BUG_ON(rc); 226 BUG_ON(rc);
178 227
228 memcpy(map_raw, map, sizeof(map));
179 e820.nr_map = 0; 229 e820.nr_map = 0;
180 xen_extra_mem_start = mem_end; 230 xen_extra_mem_start = mem_end;
181 for (i = 0; i < memmap.nr_entries; i++) { 231 for (i = 0; i < memmap.nr_entries; i++) {
@@ -194,6 +244,15 @@ char * __init xen_memory_setup(void)
194 end -= delta; 244 end -= delta;
195 245
196 extra_pages += PFN_DOWN(delta); 246 extra_pages += PFN_DOWN(delta);
247 /*
248 * Set RAM below 4GB that is not for us to be unusable.
249 * This prevents "System RAM" address space from being
250 * used as potential resource for I/O address (happens
251 * when 'allocate_resource' is called).
252 */
253 if (delta &&
254 (xen_initial_domain() && end < 0x100000000ULL))
255 e820_add_region(end, delta, E820_UNUSABLE);
197 } 256 }
198 257
199 if (map[i].size > 0 && end > xen_extra_mem_start) 258 if (map[i].size > 0 && end > xen_extra_mem_start)
@@ -251,6 +310,13 @@ char * __init xen_memory_setup(void)
251 310
252 xen_add_extra_mem(extra_pages); 311 xen_add_extra_mem(extra_pages);
253 312
313 /*
314 * Set P2M for all non-RAM pages and E820 gaps to be identity
315 * type PFNs. We supply it with the non-sanitized version
316 * of the E820.
317 */
318 identity_pages = xen_set_identity(map_raw, memmap.nr_entries);
319 printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages);
254 return "Xen"; 320 return "Xen";
255} 321}
256 322
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 72a4c7959045..30612441ed99 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -509,3 +509,41 @@ void __init xen_smp_init(void)
509 xen_fill_possible_map(); 509 xen_fill_possible_map();
510 xen_init_spinlocks(); 510 xen_init_spinlocks();
511} 511}
512
513static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
514{
515 native_smp_prepare_cpus(max_cpus);
516 WARN_ON(xen_smp_intr_init(0));
517
518 if (!xen_have_vector_callback)
519 return;
520 xen_init_lock_cpu(0);
521 xen_init_spinlocks();
522}
523
524static int __cpuinit xen_hvm_cpu_up(unsigned int cpu)
525{
526 int rc;
527 rc = native_cpu_up(cpu);
528 WARN_ON (xen_smp_intr_init(cpu));
529 return rc;
530}
531
532static void xen_hvm_cpu_die(unsigned int cpu)
533{
534 unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
535 unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
536 unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
537 unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
538 native_cpu_die(cpu);
539}
540
541void __init xen_hvm_smp_init(void)
542{
543 smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
544 smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
545 smp_ops.cpu_up = xen_hvm_cpu_up;
546 smp_ops.cpu_die = xen_hvm_cpu_die;
547 smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi;
548 smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi;
549}
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 9bbd63a129b5..45329c8c226e 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -12,7 +12,7 @@
12#include "xen-ops.h" 12#include "xen-ops.h"
13#include "mmu.h" 13#include "mmu.h"
14 14
15void xen_pre_suspend(void) 15void xen_arch_pre_suspend(void)
16{ 16{
17 xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); 17 xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
18 xen_start_info->console.domU.mfn = 18 xen_start_info->console.domU.mfn =
@@ -26,8 +26,9 @@ void xen_pre_suspend(void)
26 BUG(); 26 BUG();
27} 27}
28 28
29void xen_hvm_post_suspend(int suspend_cancelled) 29void xen_arch_hvm_post_suspend(int suspend_cancelled)
30{ 30{
31#ifdef CONFIG_XEN_PVHVM
31 int cpu; 32 int cpu;
32 xen_hvm_init_shared_info(); 33 xen_hvm_init_shared_info();
33 xen_callback_vector(); 34 xen_callback_vector();
@@ -37,9 +38,10 @@ void xen_hvm_post_suspend(int suspend_cancelled)
37 xen_setup_runstate_info(cpu); 38 xen_setup_runstate_info(cpu);
38 } 39 }
39 } 40 }
41#endif
40} 42}
41 43
42void xen_post_suspend(int suspend_cancelled) 44void xen_arch_post_suspend(int suspend_cancelled)
43{ 45{
44 xen_build_mfn_list_list(); 46 xen_build_mfn_list_list();
45 47
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 067759e3d6a5..2e2d370a47b1 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -397,7 +397,9 @@ void xen_setup_timer(int cpu)
397 name = "<timer kasprintf failed>"; 397 name = "<timer kasprintf failed>";
398 398
399 irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, 399 irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
400 IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER, 400 IRQF_DISABLED|IRQF_PERCPU|
401 IRQF_NOBALANCING|IRQF_TIMER|
402 IRQF_FORCE_RESUME,
401 name, NULL); 403 name, NULL);
402 404
403 evt = &per_cpu(xen_clock_events, cpu); 405 evt = &per_cpu(xen_clock_events, cpu);
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 1a5ff24e29c0..aaa7291c9259 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -28,9 +28,9 @@ ENTRY(startup_xen)
28 __FINIT 28 __FINIT
29 29
30.pushsection .text 30.pushsection .text
31 .align PAGE_SIZE_asm 31 .align PAGE_SIZE
32ENTRY(hypercall_page) 32ENTRY(hypercall_page)
33 .skip PAGE_SIZE_asm 33 .skip PAGE_SIZE
34.popsection 34.popsection
35 35
36 ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") 36 ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9d41bf985757..3112f55638c4 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -64,10 +64,12 @@ void xen_setup_vcpu_info_placement(void);
64 64
65#ifdef CONFIG_SMP 65#ifdef CONFIG_SMP
66void xen_smp_init(void); 66void xen_smp_init(void);
67void __init xen_hvm_smp_init(void);
67 68
68extern cpumask_var_t xen_cpu_initialized_map; 69extern cpumask_var_t xen_cpu_initialized_map;
69#else 70#else
70static inline void xen_smp_init(void) {} 71static inline void xen_smp_init(void) {}
72static inline void xen_hvm_smp_init(void) {}
71#endif 73#endif
72 74
73#ifdef CONFIG_PARAVIRT_SPINLOCKS 75#ifdef CONFIG_PARAVIRT_SPINLOCKS
diff --git a/arch/xtensa/include/asm/rwsem.h b/arch/xtensa/include/asm/rwsem.h
index e39edf5c86f2..249619e7e7f2 100644
--- a/arch/xtensa/include/asm/rwsem.h
+++ b/arch/xtensa/include/asm/rwsem.h
@@ -17,44 +17,12 @@
17#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead." 17#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
18#endif 18#endif
19 19
20#include <linux/list.h>
21#include <linux/spinlock.h>
22#include <asm/atomic.h>
23#include <asm/system.h>
24
25/*
26 * the semaphore definition
27 */
28struct rw_semaphore {
29 signed long count;
30#define RWSEM_UNLOCKED_VALUE 0x00000000 20#define RWSEM_UNLOCKED_VALUE 0x00000000
31#define RWSEM_ACTIVE_BIAS 0x00000001 21#define RWSEM_ACTIVE_BIAS 0x00000001
32#define RWSEM_ACTIVE_MASK 0x0000ffff 22#define RWSEM_ACTIVE_MASK 0x0000ffff
33#define RWSEM_WAITING_BIAS (-0x00010000) 23#define RWSEM_WAITING_BIAS (-0x00010000)
34#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS 24#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
35#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 25#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
36 spinlock_t wait_lock;
37 struct list_head wait_list;
38};
39
40#define __RWSEM_INITIALIZER(name) \
41 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
42 LIST_HEAD_INIT((name).wait_list) }
43
44#define DECLARE_RWSEM(name) \
45 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
46
47extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
48extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
49extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
50extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
51
52static inline void init_rwsem(struct rw_semaphore *sem)
53{
54 sem->count = RWSEM_UNLOCKED_VALUE;
55 spin_lock_init(&sem->wait_lock);
56 INIT_LIST_HEAD(&sem->wait_list);
57}
58 26
59/* 27/*
60 * lock for reading 28 * lock for reading
@@ -160,9 +128,4 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
160 return atomic_add_return(delta, (atomic_t *)(&sem->count)); 128 return atomic_add_return(delta, (atomic_t *)(&sem->count));
161} 129}
162 130
163static inline int rwsem_is_locked(struct rw_semaphore *sem)
164{
165 return (sem->count != 0);
166}
167
168#endif /* _XTENSA_RWSEM_H */ 131#endif /* _XTENSA_RWSEM_H */
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index 19df764f6399..f3e5eb43f71c 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c
@@ -96,16 +96,12 @@ again:
96 update_process_times(user_mode(get_irq_regs())); 96 update_process_times(user_mode(get_irq_regs()));
97#endif 97#endif
98 98
99 write_seqlock(&xtime_lock); 99 xtime_update(1); /* Linux handler in kernel/time/timekeeping */
100
101 do_timer(1); /* Linux handler in kernel/timer.c */
102 100
103 /* Note that writing CCOMPARE clears the interrupt. */ 101 /* Note that writing CCOMPARE clears the interrupt. */
104 102
105 next += CCOUNT_PER_JIFFY; 103 next += CCOUNT_PER_JIFFY;
106 set_linux_timer(next); 104 set_linux_timer(next);
107
108 write_sequnlock(&xtime_lock);
109 } 105 }
110 106
111 /* Allow platform to do something useful (Wdog). */ 107 /* Allow platform to do something useful (Wdog). */
diff --git a/block/blk-core.c b/block/blk-core.c
index 2f4002f79a24..518dd423a5fe 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -352,7 +352,7 @@ void blk_start_queue(struct request_queue *q)
352 WARN_ON(!irqs_disabled()); 352 WARN_ON(!irqs_disabled());
353 353
354 queue_flag_clear(QUEUE_FLAG_STOPPED, q); 354 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
355 __blk_run_queue(q); 355 __blk_run_queue(q, false);
356} 356}
357EXPORT_SYMBOL(blk_start_queue); 357EXPORT_SYMBOL(blk_start_queue);
358 358
@@ -403,13 +403,14 @@ EXPORT_SYMBOL(blk_sync_queue);
403/** 403/**
404 * __blk_run_queue - run a single device queue 404 * __blk_run_queue - run a single device queue
405 * @q: The queue to run 405 * @q: The queue to run
406 * @force_kblockd: Don't run @q->request_fn directly. Use kblockd.
406 * 407 *
407 * Description: 408 * Description:
408 * See @blk_run_queue. This variant must be called with the queue lock 409 * See @blk_run_queue. This variant must be called with the queue lock
409 * held and interrupts disabled. 410 * held and interrupts disabled.
410 * 411 *
411 */ 412 */
412void __blk_run_queue(struct request_queue *q) 413void __blk_run_queue(struct request_queue *q, bool force_kblockd)
413{ 414{
414 blk_remove_plug(q); 415 blk_remove_plug(q);
415 416
@@ -423,7 +424,7 @@ void __blk_run_queue(struct request_queue *q)
423 * Only recurse once to avoid overrunning the stack, let the unplug 424 * Only recurse once to avoid overrunning the stack, let the unplug
424 * handling reinvoke the handler shortly if we already got there. 425 * handling reinvoke the handler shortly if we already got there.
425 */ 426 */
426 if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { 427 if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
427 q->request_fn(q); 428 q->request_fn(q);
428 queue_flag_clear(QUEUE_FLAG_REENTER, q); 429 queue_flag_clear(QUEUE_FLAG_REENTER, q);
429 } else { 430 } else {
@@ -446,7 +447,7 @@ void blk_run_queue(struct request_queue *q)
446 unsigned long flags; 447 unsigned long flags;
447 448
448 spin_lock_irqsave(q->queue_lock, flags); 449 spin_lock_irqsave(q->queue_lock, flags);
449 __blk_run_queue(q); 450 __blk_run_queue(q, false);
450 spin_unlock_irqrestore(q->queue_lock, flags); 451 spin_unlock_irqrestore(q->queue_lock, flags);
451} 452}
452EXPORT_SYMBOL(blk_run_queue); 453EXPORT_SYMBOL(blk_run_queue);
@@ -1053,7 +1054,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq,
1053 1054
1054 drive_stat_acct(rq, 1); 1055 drive_stat_acct(rq, 1);
1055 __elv_add_request(q, rq, where, 0); 1056 __elv_add_request(q, rq, where, 0);
1056 __blk_run_queue(q); 1057 __blk_run_queue(q, false);
1057 spin_unlock_irqrestore(q->queue_lock, flags); 1058 spin_unlock_irqrestore(q->queue_lock, flags);
1058} 1059}
1059EXPORT_SYMBOL(blk_insert_request); 1060EXPORT_SYMBOL(blk_insert_request);
@@ -2610,13 +2611,6 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
2610} 2611}
2611EXPORT_SYMBOL(kblockd_schedule_work); 2612EXPORT_SYMBOL(kblockd_schedule_work);
2612 2613
2613int kblockd_schedule_delayed_work(struct request_queue *q,
2614 struct delayed_work *dwork, unsigned long delay)
2615{
2616 return queue_delayed_work(kblockd_workqueue, dwork, delay);
2617}
2618EXPORT_SYMBOL(kblockd_schedule_delayed_work);
2619
2620int __init blk_dev_init(void) 2614int __init blk_dev_init(void)
2621{ 2615{
2622 BUILD_BUG_ON(__REQ_NR_BITS > 8 * 2616 BUILD_BUG_ON(__REQ_NR_BITS > 8 *
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 54b123d6563e..b27d0208611b 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -66,10 +66,12 @@ static void blk_flush_complete_seq_end_io(struct request_queue *q,
66 66
67 /* 67 /*
68 * Moving a request silently to empty queue_head may stall the 68 * Moving a request silently to empty queue_head may stall the
69 * queue. Kick the queue in those cases. 69 * queue. Kick the queue in those cases. This function is called
70 * from request completion path and calling directly into
71 * request_fn may confuse the driver. Always use kblockd.
70 */ 72 */
71 if (was_empty && next_rq) 73 if (was_empty && next_rq)
72 __blk_run_queue(q); 74 __blk_run_queue(q, true);
73} 75}
74 76
75static void pre_flush_end_io(struct request *rq, int error) 77static void pre_flush_end_io(struct request *rq, int error)
@@ -130,7 +132,7 @@ static struct request *queue_next_fseq(struct request_queue *q)
130 BUG(); 132 BUG();
131 } 133 }
132 134
133 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 135 elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
134 return rq; 136 return rq;
135} 137}
136 138
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 1a320d2406b0..bd3e8df4d5e2 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -109,7 +109,6 @@ struct bio_batch
109 atomic_t done; 109 atomic_t done;
110 unsigned long flags; 110 unsigned long flags;
111 struct completion *wait; 111 struct completion *wait;
112 bio_end_io_t *end_io;
113}; 112};
114 113
115static void bio_batch_end_io(struct bio *bio, int err) 114static void bio_batch_end_io(struct bio *bio, int err)
@@ -122,17 +121,14 @@ static void bio_batch_end_io(struct bio *bio, int err)
122 else 121 else
123 clear_bit(BIO_UPTODATE, &bb->flags); 122 clear_bit(BIO_UPTODATE, &bb->flags);
124 } 123 }
125 if (bb) { 124 if (bb)
126 if (bb->end_io) 125 if (atomic_dec_and_test(&bb->done))
127 bb->end_io(bio, err); 126 complete(bb->wait);
128 atomic_inc(&bb->done);
129 complete(bb->wait);
130 }
131 bio_put(bio); 127 bio_put(bio);
132} 128}
133 129
134/** 130/**
135 * blkdev_issue_zeroout generate number of zero filed write bios 131 * blkdev_issue_zeroout - generate number of zero filed write bios
136 * @bdev: blockdev to issue 132 * @bdev: blockdev to issue
137 * @sector: start sector 133 * @sector: start sector
138 * @nr_sects: number of sectors to write 134 * @nr_sects: number of sectors to write
@@ -150,13 +146,12 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
150 int ret; 146 int ret;
151 struct bio *bio; 147 struct bio *bio;
152 struct bio_batch bb; 148 struct bio_batch bb;
153 unsigned int sz, issued = 0; 149 unsigned int sz;
154 DECLARE_COMPLETION_ONSTACK(wait); 150 DECLARE_COMPLETION_ONSTACK(wait);
155 151
156 atomic_set(&bb.done, 0); 152 atomic_set(&bb.done, 1);
157 bb.flags = 1 << BIO_UPTODATE; 153 bb.flags = 1 << BIO_UPTODATE;
158 bb.wait = &wait; 154 bb.wait = &wait;
159 bb.end_io = NULL;
160 155
161submit: 156submit:
162 ret = 0; 157 ret = 0;
@@ -185,12 +180,12 @@ submit:
185 break; 180 break;
186 } 181 }
187 ret = 0; 182 ret = 0;
188 issued++; 183 atomic_inc(&bb.done);
189 submit_bio(WRITE, bio); 184 submit_bio(WRITE, bio);
190 } 185 }
191 186
192 /* Wait for bios in-flight */ 187 /* Wait for bios in-flight */
193 while (issued != atomic_read(&bb.done)) 188 if (!atomic_dec_and_test(&bb.done))
194 wait_for_completion(&wait); 189 wait_for_completion(&wait);
195 190
196 if (!test_bit(BIO_UPTODATE, &bb.flags)) 191 if (!test_bit(BIO_UPTODATE, &bb.flags))
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index a89043a3caa4..e36cc10a346c 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -20,6 +20,11 @@ static int throtl_quantum = 32;
20/* Throttling is performed over 100ms slice and after that slice is renewed */ 20/* Throttling is performed over 100ms slice and after that slice is renewed */
21static unsigned long throtl_slice = HZ/10; /* 100 ms */ 21static unsigned long throtl_slice = HZ/10; /* 100 ms */
22 22
23/* A workqueue to queue throttle related work */
24static struct workqueue_struct *kthrotld_workqueue;
25static void throtl_schedule_delayed_work(struct throtl_data *td,
26 unsigned long delay);
27
23struct throtl_rb_root { 28struct throtl_rb_root {
24 struct rb_root rb; 29 struct rb_root rb;
25 struct rb_node *left; 30 struct rb_node *left;
@@ -345,10 +350,9 @@ static void throtl_schedule_next_dispatch(struct throtl_data *td)
345 update_min_dispatch_time(st); 350 update_min_dispatch_time(st);
346 351
347 if (time_before_eq(st->min_disptime, jiffies)) 352 if (time_before_eq(st->min_disptime, jiffies))
348 throtl_schedule_delayed_work(td->queue, 0); 353 throtl_schedule_delayed_work(td, 0);
349 else 354 else
350 throtl_schedule_delayed_work(td->queue, 355 throtl_schedule_delayed_work(td, (st->min_disptime - jiffies));
351 (st->min_disptime - jiffies));
352} 356}
353 357
354static inline void 358static inline void
@@ -815,10 +819,10 @@ void blk_throtl_work(struct work_struct *work)
815} 819}
816 820
817/* Call with queue lock held */ 821/* Call with queue lock held */
818void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay) 822static void
823throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
819{ 824{
820 825
821 struct throtl_data *td = q->td;
822 struct delayed_work *dwork = &td->throtl_work; 826 struct delayed_work *dwork = &td->throtl_work;
823 827
824 if (total_nr_queued(td) > 0) { 828 if (total_nr_queued(td) > 0) {
@@ -827,12 +831,11 @@ void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay)
827 * Cancel that and schedule a new one. 831 * Cancel that and schedule a new one.
828 */ 832 */
829 __cancel_delayed_work(dwork); 833 __cancel_delayed_work(dwork);
830 kblockd_schedule_delayed_work(q, dwork, delay); 834 queue_delayed_work(kthrotld_workqueue, dwork, delay);
831 throtl_log(td, "schedule work. delay=%lu jiffies=%lu", 835 throtl_log(td, "schedule work. delay=%lu jiffies=%lu",
832 delay, jiffies); 836 delay, jiffies);
833 } 837 }
834} 838}
835EXPORT_SYMBOL(throtl_schedule_delayed_work);
836 839
837static void 840static void
838throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg) 841throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg)
@@ -920,7 +923,7 @@ static void throtl_update_blkio_group_read_bps(void *key,
920 smp_mb__after_atomic_inc(); 923 smp_mb__after_atomic_inc();
921 924
922 /* Schedule a work now to process the limit change */ 925 /* Schedule a work now to process the limit change */
923 throtl_schedule_delayed_work(td->queue, 0); 926 throtl_schedule_delayed_work(td, 0);
924} 927}
925 928
926static void throtl_update_blkio_group_write_bps(void *key, 929static void throtl_update_blkio_group_write_bps(void *key,
@@ -934,7 +937,7 @@ static void throtl_update_blkio_group_write_bps(void *key,
934 smp_mb__before_atomic_inc(); 937 smp_mb__before_atomic_inc();
935 atomic_inc(&td->limits_changed); 938 atomic_inc(&td->limits_changed);
936 smp_mb__after_atomic_inc(); 939 smp_mb__after_atomic_inc();
937 throtl_schedule_delayed_work(td->queue, 0); 940 throtl_schedule_delayed_work(td, 0);
938} 941}
939 942
940static void throtl_update_blkio_group_read_iops(void *key, 943static void throtl_update_blkio_group_read_iops(void *key,
@@ -948,7 +951,7 @@ static void throtl_update_blkio_group_read_iops(void *key,
948 smp_mb__before_atomic_inc(); 951 smp_mb__before_atomic_inc();
949 atomic_inc(&td->limits_changed); 952 atomic_inc(&td->limits_changed);
950 smp_mb__after_atomic_inc(); 953 smp_mb__after_atomic_inc();
951 throtl_schedule_delayed_work(td->queue, 0); 954 throtl_schedule_delayed_work(td, 0);
952} 955}
953 956
954static void throtl_update_blkio_group_write_iops(void *key, 957static void throtl_update_blkio_group_write_iops(void *key,
@@ -962,7 +965,7 @@ static void throtl_update_blkio_group_write_iops(void *key,
962 smp_mb__before_atomic_inc(); 965 smp_mb__before_atomic_inc();
963 atomic_inc(&td->limits_changed); 966 atomic_inc(&td->limits_changed);
964 smp_mb__after_atomic_inc(); 967 smp_mb__after_atomic_inc();
965 throtl_schedule_delayed_work(td->queue, 0); 968 throtl_schedule_delayed_work(td, 0);
966} 969}
967 970
968void throtl_shutdown_timer_wq(struct request_queue *q) 971void throtl_shutdown_timer_wq(struct request_queue *q)
@@ -1135,6 +1138,10 @@ void blk_throtl_exit(struct request_queue *q)
1135 1138
1136static int __init throtl_init(void) 1139static int __init throtl_init(void)
1137{ 1140{
1141 kthrotld_workqueue = alloc_workqueue("kthrotld", WQ_MEM_RECLAIM, 0);
1142 if (!kthrotld_workqueue)
1143 panic("Failed to create kthrotld\n");
1144
1138 blkio_policy_register(&blkio_policy_throtl); 1145 blkio_policy_register(&blkio_policy_throtl);
1139 return 0; 1146 return 0;
1140} 1147}
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 7be4c7959625..ea83a4f0c27d 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3355,7 +3355,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3355 cfqd->busy_queues > 1) { 3355 cfqd->busy_queues > 1) {
3356 cfq_del_timer(cfqd, cfqq); 3356 cfq_del_timer(cfqd, cfqq);
3357 cfq_clear_cfqq_wait_request(cfqq); 3357 cfq_clear_cfqq_wait_request(cfqq);
3358 __blk_run_queue(cfqd->queue); 3358 __blk_run_queue(cfqd->queue, false);
3359 } else { 3359 } else {
3360 cfq_blkiocg_update_idle_time_stats( 3360 cfq_blkiocg_update_idle_time_stats(
3361 &cfqq->cfqg->blkg); 3361 &cfqq->cfqg->blkg);
@@ -3370,7 +3370,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3370 * this new queue is RT and the current one is BE 3370 * this new queue is RT and the current one is BE
3371 */ 3371 */
3372 cfq_preempt_queue(cfqd, cfqq); 3372 cfq_preempt_queue(cfqd, cfqq);
3373 __blk_run_queue(cfqd->queue); 3373 __blk_run_queue(cfqd->queue, false);
3374 } 3374 }
3375} 3375}
3376 3376
@@ -3731,7 +3731,7 @@ static void cfq_kick_queue(struct work_struct *work)
3731 struct request_queue *q = cfqd->queue; 3731 struct request_queue *q = cfqd->queue;
3732 3732
3733 spin_lock_irq(q->queue_lock); 3733 spin_lock_irq(q->queue_lock);
3734 __blk_run_queue(cfqd->queue); 3734 __blk_run_queue(cfqd->queue, false);
3735 spin_unlock_irq(q->queue_lock); 3735 spin_unlock_irq(q->queue_lock);
3736} 3736}
3737 3737
diff --git a/block/elevator.c b/block/elevator.c
index 2569512830d3..236e93c1f46c 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -602,7 +602,7 @@ void elv_quiesce_start(struct request_queue *q)
602 */ 602 */
603 elv_drain_elevator(q); 603 elv_drain_elevator(q);
604 while (q->rq.elvpriv) { 604 while (q->rq.elvpriv) {
605 __blk_run_queue(q); 605 __blk_run_queue(q, false);
606 spin_unlock_irq(q->queue_lock); 606 spin_unlock_irq(q->queue_lock);
607 msleep(10); 607 msleep(10);
608 spin_lock_irq(q->queue_lock); 608 spin_lock_irq(q->queue_lock);
@@ -651,7 +651,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
651 * with anything. There's no point in delaying queue 651 * with anything. There's no point in delaying queue
652 * processing. 652 * processing.
653 */ 653 */
654 __blk_run_queue(q); 654 __blk_run_queue(q, false);
655 break; 655 break;
656 656
657 case ELEVATOR_INSERT_SORT: 657 case ELEVATOR_INSERT_SORT:
diff --git a/block/genhd.c b/block/genhd.c
index 6a5b772aa201..cbf1112a885c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1355,7 +1355,7 @@ int invalidate_partition(struct gendisk *disk, int partno)
1355 struct block_device *bdev = bdget_disk(disk, partno); 1355 struct block_device *bdev = bdget_disk(disk, partno);
1356 if (bdev) { 1356 if (bdev) {
1357 fsync_bdev(bdev); 1357 fsync_bdev(bdev);
1358 res = __invalidate_device(bdev); 1358 res = __invalidate_device(bdev, true);
1359 bdput(bdev); 1359 bdput(bdev);
1360 } 1360 }
1361 return res; 1361 return res;
diff --git a/block/ioctl.c b/block/ioctl.c
index 9049d460fa89..1124cd297263 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -294,9 +294,11 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
294 return -EINVAL; 294 return -EINVAL;
295 if (get_user(n, (int __user *) arg)) 295 if (get_user(n, (int __user *) arg))
296 return -EFAULT; 296 return -EFAULT;
297 if (!(mode & FMODE_EXCL) && 297 if (!(mode & FMODE_EXCL)) {
298 blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0) 298 bdgrab(bdev);
299 return -EBUSY; 299 if (blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0)
300 return -EBUSY;
301 }
300 ret = set_blocksize(bdev, n); 302 ret = set_blocksize(bdev, n);
301 if (!(mode & FMODE_EXCL)) 303 if (!(mode & FMODE_EXCL))
302 blkdev_put(bdev, mode | FMODE_EXCL); 304 blkdev_put(bdev, mode | FMODE_EXCL);
diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index 54784bb42cec..edc25867ad9d 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -416,10 +416,15 @@ struct acpi_gpe_handler_info {
416 u8 originally_enabled; /* True if GPE was originally enabled */ 416 u8 originally_enabled; /* True if GPE was originally enabled */
417}; 417};
418 418
419struct acpi_gpe_notify_object {
420 struct acpi_namespace_node *node;
421 struct acpi_gpe_notify_object *next;
422};
423
419union acpi_gpe_dispatch_info { 424union acpi_gpe_dispatch_info {
420 struct acpi_namespace_node *method_node; /* Method node for this GPE level */ 425 struct acpi_namespace_node *method_node; /* Method node for this GPE level */
421 struct acpi_gpe_handler_info *handler; /* Installed GPE handler */ 426 struct acpi_gpe_handler_info *handler; /* Installed GPE handler */
422 struct acpi_namespace_node *device_node; /* Parent _PRW device for implicit notify */ 427 struct acpi_gpe_notify_object device; /* List of _PRW devices for implicit notify */
423}; 428};
424 429
425/* 430/*
diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c
index 14988a86066f..f4725212eb48 100644
--- a/drivers/acpi/acpica/evgpe.c
+++ b/drivers/acpi/acpica/evgpe.c
@@ -457,6 +457,7 @@ static void ACPI_SYSTEM_XFACE acpi_ev_asynch_execute_gpe_method(void *context)
457 acpi_status status; 457 acpi_status status;
458 struct acpi_gpe_event_info *local_gpe_event_info; 458 struct acpi_gpe_event_info *local_gpe_event_info;
459 struct acpi_evaluate_info *info; 459 struct acpi_evaluate_info *info;
460 struct acpi_gpe_notify_object *notify_object;
460 461
461 ACPI_FUNCTION_TRACE(ev_asynch_execute_gpe_method); 462 ACPI_FUNCTION_TRACE(ev_asynch_execute_gpe_method);
462 463
@@ -508,10 +509,18 @@ static void ACPI_SYSTEM_XFACE acpi_ev_asynch_execute_gpe_method(void *context)
508 * from this thread -- because handlers may in turn run other 509 * from this thread -- because handlers may in turn run other
509 * control methods. 510 * control methods.
510 */ 511 */
511 status = 512 status = acpi_ev_queue_notify_request(
512 acpi_ev_queue_notify_request(local_gpe_event_info->dispatch. 513 local_gpe_event_info->dispatch.device.node,
513 device_node, 514 ACPI_NOTIFY_DEVICE_WAKE);
514 ACPI_NOTIFY_DEVICE_WAKE); 515
516 notify_object = local_gpe_event_info->dispatch.device.next;
517 while (ACPI_SUCCESS(status) && notify_object) {
518 status = acpi_ev_queue_notify_request(
519 notify_object->node,
520 ACPI_NOTIFY_DEVICE_WAKE);
521 notify_object = notify_object->next;
522 }
523
515 break; 524 break;
516 525
517 case ACPI_GPE_DISPATCH_METHOD: 526 case ACPI_GPE_DISPATCH_METHOD:
diff --git a/drivers/acpi/acpica/evxfgpe.c b/drivers/acpi/acpica/evxfgpe.c
index 3b20a3401b64..52aaff3df562 100644
--- a/drivers/acpi/acpica/evxfgpe.c
+++ b/drivers/acpi/acpica/evxfgpe.c
@@ -198,7 +198,9 @@ acpi_setup_gpe_for_wake(acpi_handle wake_device,
198 acpi_status status = AE_BAD_PARAMETER; 198 acpi_status status = AE_BAD_PARAMETER;
199 struct acpi_gpe_event_info *gpe_event_info; 199 struct acpi_gpe_event_info *gpe_event_info;
200 struct acpi_namespace_node *device_node; 200 struct acpi_namespace_node *device_node;
201 struct acpi_gpe_notify_object *notify_object;
201 acpi_cpu_flags flags; 202 acpi_cpu_flags flags;
203 u8 gpe_dispatch_mask;
202 204
203 ACPI_FUNCTION_TRACE(acpi_setup_gpe_for_wake); 205 ACPI_FUNCTION_TRACE(acpi_setup_gpe_for_wake);
204 206
@@ -221,27 +223,49 @@ acpi_setup_gpe_for_wake(acpi_handle wake_device,
221 goto unlock_and_exit; 223 goto unlock_and_exit;
222 } 224 }
223 225
226 if (wake_device == ACPI_ROOT_OBJECT) {
227 goto out;
228 }
229
224 /* 230 /*
225 * If there is no method or handler for this GPE, then the 231 * If there is no method or handler for this GPE, then the
226 * wake_device will be notified whenever this GPE fires (aka 232 * wake_device will be notified whenever this GPE fires (aka
227 * "implicit notify") Note: The GPE is assumed to be 233 * "implicit notify") Note: The GPE is assumed to be
228 * level-triggered (for windows compatibility). 234 * level-triggered (for windows compatibility).
229 */ 235 */
230 if (((gpe_event_info->flags & ACPI_GPE_DISPATCH_MASK) == 236 gpe_dispatch_mask = gpe_event_info->flags & ACPI_GPE_DISPATCH_MASK;
231 ACPI_GPE_DISPATCH_NONE) && (wake_device != ACPI_ROOT_OBJECT)) { 237 if (gpe_dispatch_mask != ACPI_GPE_DISPATCH_NONE
238 && gpe_dispatch_mask != ACPI_GPE_DISPATCH_NOTIFY) {
239 goto out;
240 }
232 241
233 /* Validate wake_device is of type Device */ 242 /* Validate wake_device is of type Device */
234 243
235 device_node = ACPI_CAST_PTR(struct acpi_namespace_node, 244 device_node = ACPI_CAST_PTR(struct acpi_namespace_node, wake_device);
236 wake_device); 245 if (device_node->type != ACPI_TYPE_DEVICE) {
237 if (device_node->type != ACPI_TYPE_DEVICE) { 246 goto unlock_and_exit;
238 goto unlock_and_exit; 247 }
239 } 248
249 if (gpe_dispatch_mask == ACPI_GPE_DISPATCH_NONE) {
240 gpe_event_info->flags = (ACPI_GPE_DISPATCH_NOTIFY | 250 gpe_event_info->flags = (ACPI_GPE_DISPATCH_NOTIFY |
241 ACPI_GPE_LEVEL_TRIGGERED); 251 ACPI_GPE_LEVEL_TRIGGERED);
242 gpe_event_info->dispatch.device_node = device_node; 252 gpe_event_info->dispatch.device.node = device_node;
253 gpe_event_info->dispatch.device.next = NULL;
254 } else {
255 /* There are multiple devices to notify implicitly. */
256
257 notify_object = ACPI_ALLOCATE_ZEROED(sizeof(*notify_object));
258 if (!notify_object) {
259 status = AE_NO_MEMORY;
260 goto unlock_and_exit;
261 }
262
263 notify_object->node = device_node;
264 notify_object->next = gpe_event_info->dispatch.device.next;
265 gpe_event_info->dispatch.device.next = notify_object;
243 } 266 }
244 267
268 out:
245 gpe_event_info->flags |= ACPI_GPE_CAN_WAKE; 269 gpe_event_info->flags |= ACPI_GPE_CAN_WAKE;
246 status = AE_OK; 270 status = AE_OK;
247 271
diff --git a/drivers/acpi/debugfs.c b/drivers/acpi/debugfs.c
index 5df67f1d6c61..384f7abcff77 100644
--- a/drivers/acpi/debugfs.c
+++ b/drivers/acpi/debugfs.c
@@ -26,7 +26,9 @@ static ssize_t cm_write(struct file *file, const char __user * user_buf,
26 size_t count, loff_t *ppos) 26 size_t count, loff_t *ppos)
27{ 27{
28 static char *buf; 28 static char *buf;
29 static int uncopied_bytes; 29 static u32 max_size;
30 static u32 uncopied_bytes;
31
30 struct acpi_table_header table; 32 struct acpi_table_header table;
31 acpi_status status; 33 acpi_status status;
32 34
@@ -37,19 +39,24 @@ static ssize_t cm_write(struct file *file, const char __user * user_buf,
37 if (copy_from_user(&table, user_buf, 39 if (copy_from_user(&table, user_buf,
38 sizeof(struct acpi_table_header))) 40 sizeof(struct acpi_table_header)))
39 return -EFAULT; 41 return -EFAULT;
40 uncopied_bytes = table.length; 42 uncopied_bytes = max_size = table.length;
41 buf = kzalloc(uncopied_bytes, GFP_KERNEL); 43 buf = kzalloc(max_size, GFP_KERNEL);
42 if (!buf) 44 if (!buf)
43 return -ENOMEM; 45 return -ENOMEM;
44 } 46 }
45 47
46 if (uncopied_bytes < count) { 48 if (buf == NULL)
47 kfree(buf); 49 return -EINVAL;
50
51 if ((*ppos > max_size) ||
52 (*ppos + count > max_size) ||
53 (*ppos + count < count) ||
54 (count > uncopied_bytes))
48 return -EINVAL; 55 return -EINVAL;
49 }
50 56
51 if (copy_from_user(buf + (*ppos), user_buf, count)) { 57 if (copy_from_user(buf + (*ppos), user_buf, count)) {
52 kfree(buf); 58 kfree(buf);
59 buf = NULL;
53 return -EFAULT; 60 return -EFAULT;
54 } 61 }
55 62
@@ -59,6 +66,7 @@ static ssize_t cm_write(struct file *file, const char __user * user_buf,
59 if (!uncopied_bytes) { 66 if (!uncopied_bytes) {
60 status = acpi_install_method(buf); 67 status = acpi_install_method(buf);
61 kfree(buf); 68 kfree(buf);
69 buf = NULL;
62 if (ACPI_FAILURE(status)) 70 if (ACPI_FAILURE(status))
63 return -EINVAL; 71 return -EINVAL;
64 add_taint(TAINT_OVERRIDDEN_ACPI_TABLE); 72 add_taint(TAINT_OVERRIDDEN_ACPI_TABLE);
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 5eb25eb3ea48..3b5c3189fd99 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -274,7 +274,7 @@ acpi_table_parse_srat(enum acpi_srat_type id,
274 274
275int __init acpi_numa_init(void) 275int __init acpi_numa_init(void)
276{ 276{
277 int ret = 0; 277 int cnt = 0;
278 278
279 /* 279 /*
280 * Should not limit number with cpu num that is from NR_CPUS or nr_cpus= 280 * Should not limit number with cpu num that is from NR_CPUS or nr_cpus=
@@ -288,7 +288,7 @@ int __init acpi_numa_init(void)
288 acpi_parse_x2apic_affinity, 0); 288 acpi_parse_x2apic_affinity, 0);
289 acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY, 289 acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
290 acpi_parse_processor_affinity, 0); 290 acpi_parse_processor_affinity, 0);
291 ret = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, 291 cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
292 acpi_parse_memory_affinity, 292 acpi_parse_memory_affinity,
293 NR_NODE_MEMBLKS); 293 NR_NODE_MEMBLKS);
294 } 294 }
@@ -297,7 +297,10 @@ int __init acpi_numa_init(void)
297 acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit); 297 acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit);
298 298
299 acpi_numa_arch_fixup(); 299 acpi_numa_arch_fixup();
300 return ret; 300
301 if (cnt <= 0)
302 return cnt ?: -ENOENT;
303 return 0;
301} 304}
302 305
303int acpi_get_pxm(acpi_handle h) 306int acpi_get_pxm(acpi_handle h)
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index b9ba04fc2b34..77fc76f8aea9 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3281,7 +3281,7 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g,
3281 struct block_device *bdev = opened_bdev[cnt]; 3281 struct block_device *bdev = opened_bdev[cnt];
3282 if (!bdev || ITYPE(drive_state[cnt].fd_device) != type) 3282 if (!bdev || ITYPE(drive_state[cnt].fd_device) != type)
3283 continue; 3283 continue;
3284 __invalidate_device(bdev); 3284 __invalidate_device(bdev, true);
3285 } 3285 }
3286 mutex_unlock(&open_lock); 3286 mutex_unlock(&open_lock);
3287 } else { 3287 } else {
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 49e6a545eb63..dbf31ec9114d 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -78,7 +78,6 @@
78 78
79#include <asm/uaccess.h> 79#include <asm/uaccess.h>
80 80
81static DEFINE_MUTEX(loop_mutex);
82static LIST_HEAD(loop_devices); 81static LIST_HEAD(loop_devices);
83static DEFINE_MUTEX(loop_devices_mutex); 82static DEFINE_MUTEX(loop_devices_mutex);
84 83
@@ -1501,11 +1500,9 @@ static int lo_open(struct block_device *bdev, fmode_t mode)
1501{ 1500{
1502 struct loop_device *lo = bdev->bd_disk->private_data; 1501 struct loop_device *lo = bdev->bd_disk->private_data;
1503 1502
1504 mutex_lock(&loop_mutex);
1505 mutex_lock(&lo->lo_ctl_mutex); 1503 mutex_lock(&lo->lo_ctl_mutex);
1506 lo->lo_refcnt++; 1504 lo->lo_refcnt++;
1507 mutex_unlock(&lo->lo_ctl_mutex); 1505 mutex_unlock(&lo->lo_ctl_mutex);
1508 mutex_unlock(&loop_mutex);
1509 1506
1510 return 0; 1507 return 0;
1511} 1508}
@@ -1515,7 +1512,6 @@ static int lo_release(struct gendisk *disk, fmode_t mode)
1515 struct loop_device *lo = disk->private_data; 1512 struct loop_device *lo = disk->private_data;
1516 int err; 1513 int err;
1517 1514
1518 mutex_lock(&loop_mutex);
1519 mutex_lock(&lo->lo_ctl_mutex); 1515 mutex_lock(&lo->lo_ctl_mutex);
1520 1516
1521 if (--lo->lo_refcnt) 1517 if (--lo->lo_refcnt)
@@ -1540,7 +1536,6 @@ static int lo_release(struct gendisk *disk, fmode_t mode)
1540out: 1536out:
1541 mutex_unlock(&lo->lo_ctl_mutex); 1537 mutex_unlock(&lo->lo_ctl_mutex);
1542out_unlocked: 1538out_unlocked:
1543 mutex_unlock(&loop_mutex);
1544 return 0; 1539 return 0;
1545} 1540}
1546 1541
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index d7aa39e349a6..9cb8668ff5f4 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -120,6 +120,10 @@ static DEFINE_SPINLOCK(minor_lock);
120#define EXTENDED (1<<EXT_SHIFT) 120#define EXTENDED (1<<EXT_SHIFT)
121#define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED)) 121#define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
122#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED)) 122#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
123#define EMULATED_HD_DISK_MINOR_OFFSET (0)
124#define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256)
125#define EMULATED_SD_DISK_MINOR_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET + (4 * 16))
126#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_HD_DISK_NAME_OFFSET + 4)
123 127
124#define DEV_NAME "xvd" /* name in /dev */ 128#define DEV_NAME "xvd" /* name in /dev */
125 129
@@ -281,7 +285,7 @@ static int blkif_queue_request(struct request *req)
281 info->shadow[id].request = req; 285 info->shadow[id].request = req;
282 286
283 ring_req->id = id; 287 ring_req->id = id;
284 ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req); 288 ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
285 ring_req->handle = info->handle; 289 ring_req->handle = info->handle;
286 290
287 ring_req->operation = rq_data_dir(req) ? 291 ring_req->operation = rq_data_dir(req) ?
@@ -317,7 +321,7 @@ static int blkif_queue_request(struct request *req)
317 rq_data_dir(req) ); 321 rq_data_dir(req) );
318 322
319 info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn); 323 info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
320 ring_req->seg[i] = 324 ring_req->u.rw.seg[i] =
321 (struct blkif_request_segment) { 325 (struct blkif_request_segment) {
322 .gref = ref, 326 .gref = ref,
323 .first_sect = fsect, 327 .first_sect = fsect,
@@ -434,6 +438,65 @@ static void xlvbd_flush(struct blkfront_info *info)
434 info->feature_flush ? "enabled" : "disabled"); 438 info->feature_flush ? "enabled" : "disabled");
435} 439}
436 440
441static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
442{
443 int major;
444 major = BLKIF_MAJOR(vdevice);
445 *minor = BLKIF_MINOR(vdevice);
446 switch (major) {
447 case XEN_IDE0_MAJOR:
448 *offset = (*minor / 64) + EMULATED_HD_DISK_NAME_OFFSET;
449 *minor = ((*minor / 64) * PARTS_PER_DISK) +
450 EMULATED_HD_DISK_MINOR_OFFSET;
451 break;
452 case XEN_IDE1_MAJOR:
453 *offset = (*minor / 64) + 2 + EMULATED_HD_DISK_NAME_OFFSET;
454 *minor = (((*minor / 64) + 2) * PARTS_PER_DISK) +
455 EMULATED_HD_DISK_MINOR_OFFSET;
456 break;
457 case XEN_SCSI_DISK0_MAJOR:
458 *offset = (*minor / PARTS_PER_DISK) + EMULATED_SD_DISK_NAME_OFFSET;
459 *minor = *minor + EMULATED_SD_DISK_MINOR_OFFSET;
460 break;
461 case XEN_SCSI_DISK1_MAJOR:
462 case XEN_SCSI_DISK2_MAJOR:
463 case XEN_SCSI_DISK3_MAJOR:
464 case XEN_SCSI_DISK4_MAJOR:
465 case XEN_SCSI_DISK5_MAJOR:
466 case XEN_SCSI_DISK6_MAJOR:
467 case XEN_SCSI_DISK7_MAJOR:
468 *offset = (*minor / PARTS_PER_DISK) +
469 ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) +
470 EMULATED_SD_DISK_NAME_OFFSET;
471 *minor = *minor +
472 ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16 * PARTS_PER_DISK) +
473 EMULATED_SD_DISK_MINOR_OFFSET;
474 break;
475 case XEN_SCSI_DISK8_MAJOR:
476 case XEN_SCSI_DISK9_MAJOR:
477 case XEN_SCSI_DISK10_MAJOR:
478 case XEN_SCSI_DISK11_MAJOR:
479 case XEN_SCSI_DISK12_MAJOR:
480 case XEN_SCSI_DISK13_MAJOR:
481 case XEN_SCSI_DISK14_MAJOR:
482 case XEN_SCSI_DISK15_MAJOR:
483 *offset = (*minor / PARTS_PER_DISK) +
484 ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) +
485 EMULATED_SD_DISK_NAME_OFFSET;
486 *minor = *minor +
487 ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16 * PARTS_PER_DISK) +
488 EMULATED_SD_DISK_MINOR_OFFSET;
489 break;
490 case XENVBD_MAJOR:
491 *offset = *minor / PARTS_PER_DISK;
492 break;
493 default:
494 printk(KERN_WARNING "blkfront: your disk configuration is "
495 "incorrect, please use an xvd device instead\n");
496 return -ENODEV;
497 }
498 return 0;
499}
437 500
438static int xlvbd_alloc_gendisk(blkif_sector_t capacity, 501static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
439 struct blkfront_info *info, 502 struct blkfront_info *info,
@@ -441,7 +504,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
441{ 504{
442 struct gendisk *gd; 505 struct gendisk *gd;
443 int nr_minors = 1; 506 int nr_minors = 1;
444 int err = -ENODEV; 507 int err;
445 unsigned int offset; 508 unsigned int offset;
446 int minor; 509 int minor;
447 int nr_parts; 510 int nr_parts;
@@ -456,12 +519,20 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
456 } 519 }
457 520
458 if (!VDEV_IS_EXTENDED(info->vdevice)) { 521 if (!VDEV_IS_EXTENDED(info->vdevice)) {
459 minor = BLKIF_MINOR(info->vdevice); 522 err = xen_translate_vdev(info->vdevice, &minor, &offset);
460 nr_parts = PARTS_PER_DISK; 523 if (err)
524 return err;
525 nr_parts = PARTS_PER_DISK;
461 } else { 526 } else {
462 minor = BLKIF_MINOR_EXT(info->vdevice); 527 minor = BLKIF_MINOR_EXT(info->vdevice);
463 nr_parts = PARTS_PER_EXT_DISK; 528 nr_parts = PARTS_PER_EXT_DISK;
529 offset = minor / nr_parts;
530 if (xen_hvm_domain() && offset <= EMULATED_HD_DISK_NAME_OFFSET + 4)
531 printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with "
532 "emulated IDE disks,\n\t choose an xvd device name"
533 "from xvde on\n", info->vdevice);
464 } 534 }
535 err = -ENODEV;
465 536
466 if ((minor % nr_parts) == 0) 537 if ((minor % nr_parts) == 0)
467 nr_minors = nr_parts; 538 nr_minors = nr_parts;
@@ -475,8 +546,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
475 if (gd == NULL) 546 if (gd == NULL)
476 goto release; 547 goto release;
477 548
478 offset = minor / nr_parts;
479
480 if (nr_minors > 1) { 549 if (nr_minors > 1) {
481 if (offset < 26) 550 if (offset < 26)
482 sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset); 551 sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset);
@@ -615,7 +684,7 @@ static void blkif_completion(struct blk_shadow *s)
615{ 684{
616 int i; 685 int i;
617 for (i = 0; i < s->req.nr_segments; i++) 686 for (i = 0; i < s->req.nr_segments; i++)
618 gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL); 687 gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
619} 688}
620 689
621static irqreturn_t blkif_interrupt(int irq, void *dev_id) 690static irqreturn_t blkif_interrupt(int irq, void *dev_id)
@@ -932,7 +1001,7 @@ static int blkif_recover(struct blkfront_info *info)
932 /* Rewrite any grant references invalidated by susp/resume. */ 1001 /* Rewrite any grant references invalidated by susp/resume. */
933 for (j = 0; j < req->nr_segments; j++) 1002 for (j = 0; j < req->nr_segments; j++)
934 gnttab_grant_foreign_access_ref( 1003 gnttab_grant_foreign_access_ref(
935 req->seg[j].gref, 1004 req->u.rw.seg[j].gref,
936 info->xbdev->otherend_id, 1005 info->xbdev->otherend_id,
937 pfn_to_mfn(info->shadow[req->id].frame[j]), 1006 pfn_to_mfn(info->shadow[req->id].frame[j]),
938 rq_data_dir(info->shadow[req->id].request)); 1007 rq_data_dir(info->shadow[req->id].request));
diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index 333c21289d97..6dcd55a74c0a 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -41,6 +41,9 @@ static struct usb_device_id ath3k_table[] = {
41 41
42 /* Atheros AR9285 Malbec with sflash firmware */ 42 /* Atheros AR9285 Malbec with sflash firmware */
43 { USB_DEVICE(0x03F0, 0x311D) }, 43 { USB_DEVICE(0x03F0, 0x311D) },
44
45 /* Atheros AR5BBU12 with sflash firmware */
46 { USB_DEVICE(0x0489, 0xE02C) },
44 { } /* Terminating entry */ 47 { } /* Terminating entry */
45}; 48};
46 49
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 4cefa91e6c34..700a3840fddc 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -105,6 +105,9 @@ static struct usb_device_id blacklist_table[] = {
105 /* Atheros AR9285 Malbec with sflash firmware */ 105 /* Atheros AR9285 Malbec with sflash firmware */
106 { USB_DEVICE(0x03f0, 0x311d), .driver_info = BTUSB_IGNORE }, 106 { USB_DEVICE(0x03f0, 0x311d), .driver_info = BTUSB_IGNORE },
107 107
108 /* Atheros AR5BBU12 with sflash firmware */
109 { USB_DEVICE(0x0489, 0xe02c), .driver_info = BTUSB_IGNORE },
110
108 /* Broadcom BCM2035 */ 111 /* Broadcom BCM2035 */
109 { USB_DEVICE(0x0a5c, 0x2035), .driver_info = BTUSB_WRONG_SCO_MTU }, 112 { USB_DEVICE(0x0a5c, 0x2035), .driver_info = BTUSB_WRONG_SCO_MTU },
110 { USB_DEVICE(0x0a5c, 0x200a), .driver_info = BTUSB_WRONG_SCO_MTU }, 113 { USB_DEVICE(0x0a5c, 0x200a), .driver_info = BTUSB_WRONG_SCO_MTU },
@@ -829,7 +832,7 @@ static void btusb_work(struct work_struct *work)
829 832
830 if (hdev->conn_hash.sco_num > 0) { 833 if (hdev->conn_hash.sco_num > 0) {
831 if (!test_bit(BTUSB_DID_ISO_RESUME, &data->flags)) { 834 if (!test_bit(BTUSB_DID_ISO_RESUME, &data->flags)) {
832 err = usb_autopm_get_interface(data->isoc); 835 err = usb_autopm_get_interface(data->isoc ? data->isoc : data->intf);
833 if (err < 0) { 836 if (err < 0) {
834 clear_bit(BTUSB_ISOC_RUNNING, &data->flags); 837 clear_bit(BTUSB_ISOC_RUNNING, &data->flags);
835 usb_kill_anchored_urbs(&data->isoc_anchor); 838 usb_kill_anchored_urbs(&data->isoc_anchor);
@@ -858,7 +861,7 @@ static void btusb_work(struct work_struct *work)
858 861
859 __set_isoc_interface(hdev, 0); 862 __set_isoc_interface(hdev, 0);
860 if (test_and_clear_bit(BTUSB_DID_ISO_RESUME, &data->flags)) 863 if (test_and_clear_bit(BTUSB_DID_ISO_RESUME, &data->flags))
861 usb_autopm_put_interface(data->isoc); 864 usb_autopm_put_interface(data->isoc ? data->isoc : data->intf);
862 } 865 }
863} 866}
864 867
@@ -1041,8 +1044,6 @@ static int btusb_probe(struct usb_interface *intf,
1041 1044
1042 usb_set_intfdata(intf, data); 1045 usb_set_intfdata(intf, data);
1043 1046
1044 usb_enable_autosuspend(interface_to_usbdev(intf));
1045
1046 return 0; 1047 return 0;
1047} 1048}
1048 1049
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index 9252e85706ef..780498d76581 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -773,18 +773,23 @@ int __init agp_amd64_init(void)
773#else 773#else
774 printk(KERN_INFO PFX "You can boot with agp=try_unsupported\n"); 774 printk(KERN_INFO PFX "You can boot with agp=try_unsupported\n");
775#endif 775#endif
776 pci_unregister_driver(&agp_amd64_pci_driver);
776 return -ENODEV; 777 return -ENODEV;
777 } 778 }
778 779
779 /* First check that we have at least one AMD64 NB */ 780 /* First check that we have at least one AMD64 NB */
780 if (!pci_dev_present(amd_nb_misc_ids)) 781 if (!pci_dev_present(amd_nb_misc_ids)) {
782 pci_unregister_driver(&agp_amd64_pci_driver);
781 return -ENODEV; 783 return -ENODEV;
784 }
782 785
783 /* Look for any AGP bridge */ 786 /* Look for any AGP bridge */
784 agp_amd64_pci_driver.id_table = agp_amd64_pci_promisc_table; 787 agp_amd64_pci_driver.id_table = agp_amd64_pci_promisc_table;
785 err = driver_attach(&agp_amd64_pci_driver.driver); 788 err = driver_attach(&agp_amd64_pci_driver.driver);
786 if (err == 0 && agp_bridges_found == 0) 789 if (err == 0 && agp_bridges_found == 0) {
790 pci_unregister_driver(&agp_amd64_pci_driver);
787 err = -ENODEV; 791 err = -ENODEV;
792 }
788 } 793 }
789 return err; 794 return err;
790} 795}
diff --git a/drivers/char/agp/intel-agp.h b/drivers/char/agp/intel-agp.h
index c195bfeade11..5feebe2800e9 100644
--- a/drivers/char/agp/intel-agp.h
+++ b/drivers/char/agp/intel-agp.h
@@ -130,6 +130,7 @@
130#define INTEL_GMCH_GMS_STOLEN_352M (0xd << 4) 130#define INTEL_GMCH_GMS_STOLEN_352M (0xd << 4)
131 131
132#define I915_IFPADDR 0x60 132#define I915_IFPADDR 0x60
133#define I830_HIC 0x70
133 134
134/* Intel 965G registers */ 135/* Intel 965G registers */
135#define I965_MSAC 0x62 136#define I965_MSAC 0x62
diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index fab3d3265adb..0d09b537bb9a 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -21,6 +21,7 @@
21#include <linux/kernel.h> 21#include <linux/kernel.h>
22#include <linux/pagemap.h> 22#include <linux/pagemap.h>
23#include <linux/agp_backend.h> 23#include <linux/agp_backend.h>
24#include <linux/delay.h>
24#include <asm/smp.h> 25#include <asm/smp.h>
25#include "agp.h" 26#include "agp.h"
26#include "intel-agp.h" 27#include "intel-agp.h"
@@ -70,12 +71,8 @@ static struct _intel_private {
70 u32 __iomem *gtt; /* I915G */ 71 u32 __iomem *gtt; /* I915G */
71 bool clear_fake_agp; /* on first access via agp, fill with scratch */ 72 bool clear_fake_agp; /* on first access via agp, fill with scratch */
72 int num_dcache_entries; 73 int num_dcache_entries;
73 union { 74 void __iomem *i9xx_flush_page;
74 void __iomem *i9xx_flush_page;
75 void *i8xx_flush_page;
76 };
77 char *i81x_gtt_table; 75 char *i81x_gtt_table;
78 struct page *i8xx_page;
79 struct resource ifp_resource; 76 struct resource ifp_resource;
80 int resource_valid; 77 int resource_valid;
81 struct page *scratch_page; 78 struct page *scratch_page;
@@ -722,28 +719,6 @@ static int intel_fake_agp_fetch_size(void)
722 719
723static void i830_cleanup(void) 720static void i830_cleanup(void)
724{ 721{
725 if (intel_private.i8xx_flush_page) {
726 kunmap(intel_private.i8xx_flush_page);
727 intel_private.i8xx_flush_page = NULL;
728 }
729
730 __free_page(intel_private.i8xx_page);
731 intel_private.i8xx_page = NULL;
732}
733
734static void intel_i830_setup_flush(void)
735{
736 /* return if we've already set the flush mechanism up */
737 if (intel_private.i8xx_page)
738 return;
739
740 intel_private.i8xx_page = alloc_page(GFP_KERNEL);
741 if (!intel_private.i8xx_page)
742 return;
743
744 intel_private.i8xx_flush_page = kmap(intel_private.i8xx_page);
745 if (!intel_private.i8xx_flush_page)
746 i830_cleanup();
747} 722}
748 723
749/* The chipset_flush interface needs to get data that has already been 724/* The chipset_flush interface needs to get data that has already been
@@ -758,14 +733,27 @@ static void intel_i830_setup_flush(void)
758 */ 733 */
759static void i830_chipset_flush(void) 734static void i830_chipset_flush(void)
760{ 735{
761 unsigned int *pg = intel_private.i8xx_flush_page; 736 unsigned long timeout = jiffies + msecs_to_jiffies(1000);
737
738 /* Forcibly evict everything from the CPU write buffers.
739 * clflush appears to be insufficient.
740 */
741 wbinvd_on_all_cpus();
742
743 /* Now we've only seen documents for this magic bit on 855GM,
744 * we hope it exists for the other gen2 chipsets...
745 *
746 * Also works as advertised on my 845G.
747 */
748 writel(readl(intel_private.registers+I830_HIC) | (1<<31),
749 intel_private.registers+I830_HIC);
762 750
763 memset(pg, 0, 1024); 751 while (readl(intel_private.registers+I830_HIC) & (1<<31)) {
752 if (time_after(jiffies, timeout))
753 break;
764 754
765 if (cpu_has_clflush) 755 udelay(50);
766 clflush_cache_range(pg, 1024); 756 }
767 else if (wbinvd_on_all_cpus() != 0)
768 printk(KERN_ERR "Timed out waiting for cache flush.\n");
769} 757}
770 758
771static void i830_write_entry(dma_addr_t addr, unsigned int entry, 759static void i830_write_entry(dma_addr_t addr, unsigned int entry,
@@ -849,8 +837,6 @@ static int i830_setup(void)
849 837
850 intel_private.gtt_bus_addr = reg_addr + I810_PTE_BASE; 838 intel_private.gtt_bus_addr = reg_addr + I810_PTE_BASE;
851 839
852 intel_i830_setup_flush();
853
854 return 0; 840 return 0;
855} 841}
856 842
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 7855f9f45b8e..62787e30d508 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -900,6 +900,14 @@ static void sender(void *send_info,
900 printk("**Enqueue: %d.%9.9d\n", t.tv_sec, t.tv_usec); 900 printk("**Enqueue: %d.%9.9d\n", t.tv_sec, t.tv_usec);
901#endif 901#endif
902 902
903 /*
904 * last_timeout_jiffies is updated here to avoid
905 * smi_timeout() handler passing very large time_diff
906 * value to smi_event_handler() that causes
907 * the send command to abort.
908 */
909 smi_info->last_timeout_jiffies = jiffies;
910
903 mod_timer(&smi_info->si_timer, jiffies + SI_TIMEOUT_JIFFIES); 911 mod_timer(&smi_info->si_timer, jiffies + SI_TIMEOUT_JIFFIES);
904 912
905 if (smi_info->thread) 913 if (smi_info->thread)
diff --git a/drivers/char/mmtimer.c b/drivers/char/mmtimer.c
index e6d75627c6c8..33dc2298af73 100644
--- a/drivers/char/mmtimer.c
+++ b/drivers/char/mmtimer.c
@@ -53,6 +53,8 @@ MODULE_LICENSE("GPL");
53 53
54#define RTC_BITS 55 /* 55 bits for this implementation */ 54#define RTC_BITS 55 /* 55 bits for this implementation */
55 55
56static struct k_clock sgi_clock;
57
56extern unsigned long sn_rtc_cycles_per_second; 58extern unsigned long sn_rtc_cycles_per_second;
57 59
58#define RTC_COUNTER_ADDR ((long *)LOCAL_MMR_ADDR(SH_RTC)) 60#define RTC_COUNTER_ADDR ((long *)LOCAL_MMR_ADDR(SH_RTC))
@@ -487,7 +489,7 @@ static int sgi_clock_get(clockid_t clockid, struct timespec *tp)
487 return 0; 489 return 0;
488}; 490};
489 491
490static int sgi_clock_set(clockid_t clockid, struct timespec *tp) 492static int sgi_clock_set(const clockid_t clockid, const struct timespec *tp)
491{ 493{
492 494
493 u64 nsec; 495 u64 nsec;
@@ -763,15 +765,21 @@ static int sgi_timer_set(struct k_itimer *timr, int flags,
763 return err; 765 return err;
764} 766}
765 767
768static int sgi_clock_getres(const clockid_t which_clock, struct timespec *tp)
769{
770 tp->tv_sec = 0;
771 tp->tv_nsec = sgi_clock_period;
772 return 0;
773}
774
766static struct k_clock sgi_clock = { 775static struct k_clock sgi_clock = {
767 .res = 0, 776 .clock_set = sgi_clock_set,
768 .clock_set = sgi_clock_set, 777 .clock_get = sgi_clock_get,
769 .clock_get = sgi_clock_get, 778 .clock_getres = sgi_clock_getres,
770 .timer_create = sgi_timer_create, 779 .timer_create = sgi_timer_create,
771 .nsleep = do_posix_clock_nonanosleep, 780 .timer_set = sgi_timer_set,
772 .timer_set = sgi_timer_set, 781 .timer_del = sgi_timer_del,
773 .timer_del = sgi_timer_del, 782 .timer_get = sgi_timer_get
774 .timer_get = sgi_timer_get
775}; 783};
776 784
777/** 785/**
@@ -831,8 +839,8 @@ static int __init mmtimer_init(void)
831 (unsigned long) node); 839 (unsigned long) node);
832 } 840 }
833 841
834 sgi_clock_period = sgi_clock.res = NSEC_PER_SEC / sn_rtc_cycles_per_second; 842 sgi_clock_period = NSEC_PER_SEC / sn_rtc_cycles_per_second;
835 register_posix_clock(CLOCK_SGI_CYCLE, &sgi_clock); 843 posix_timers_register_clock(CLOCK_SGI_CYCLE, &sgi_clock);
836 844
837 printk(KERN_INFO "%s: v%s, %ld MHz\n", MMTIMER_DESC, MMTIMER_VERSION, 845 printk(KERN_INFO "%s: v%s, %ld MHz\n", MMTIMER_DESC, MMTIMER_VERSION,
838 sn_rtc_cycles_per_second/(unsigned long)1E6); 846 sn_rtc_cycles_per_second/(unsigned long)1E6);
diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c
index 777181a2e603..bcbbc71febb7 100644
--- a/drivers/char/pcmcia/cm4000_cs.c
+++ b/drivers/char/pcmcia/cm4000_cs.c
@@ -830,8 +830,7 @@ static void monitor_card(unsigned long p)
830 test_bit(IS_ANY_T1, &dev->flags))) { 830 test_bit(IS_ANY_T1, &dev->flags))) {
831 DEBUGP(4, dev, "Perform AUTOPPS\n"); 831 DEBUGP(4, dev, "Perform AUTOPPS\n");
832 set_bit(IS_AUTOPPS_ACT, &dev->flags); 832 set_bit(IS_AUTOPPS_ACT, &dev->flags);
833 ptsreq.protocol = ptsreq.protocol = 833 ptsreq.protocol = (0x01 << dev->proto);
834 (0x01 << dev->proto);
835 ptsreq.flags = 0x01; 834 ptsreq.flags = 0x01;
836 ptsreq.pts1 = 0x00; 835 ptsreq.pts1 = 0x00;
837 ptsreq.pts2 = 0x00; 836 ptsreq.pts2 = 0x00;
diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c
index 94b8eb4d691d..444155a305ae 100644
--- a/drivers/char/pcmcia/ipwireless/main.c
+++ b/drivers/char/pcmcia/ipwireless/main.c
@@ -78,7 +78,6 @@ static void signalled_reboot_callback(void *callback_data)
78static int ipwireless_probe(struct pcmcia_device *p_dev, void *priv_data) 78static int ipwireless_probe(struct pcmcia_device *p_dev, void *priv_data)
79{ 79{
80 struct ipw_dev *ipw = priv_data; 80 struct ipw_dev *ipw = priv_data;
81 struct resource *io_resource;
82 int ret; 81 int ret;
83 82
84 p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH; 83 p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
@@ -92,9 +91,12 @@ static int ipwireless_probe(struct pcmcia_device *p_dev, void *priv_data)
92 if (ret) 91 if (ret)
93 return ret; 92 return ret;
94 93
95 io_resource = request_region(p_dev->resource[0]->start, 94 if (!request_region(p_dev->resource[0]->start,
96 resource_size(p_dev->resource[0]), 95 resource_size(p_dev->resource[0]),
97 IPWIRELESS_PCCARD_NAME); 96 IPWIRELESS_PCCARD_NAME)) {
97 ret = -EBUSY;
98 goto exit;
99 }
98 100
99 p_dev->resource[2]->flags |= 101 p_dev->resource[2]->flags |=
100 WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_CM | WIN_ENABLE; 102 WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_CM | WIN_ENABLE;
@@ -105,22 +107,25 @@ static int ipwireless_probe(struct pcmcia_device *p_dev, void *priv_data)
105 107
106 ret = pcmcia_map_mem_page(p_dev, p_dev->resource[2], p_dev->card_addr); 108 ret = pcmcia_map_mem_page(p_dev, p_dev->resource[2], p_dev->card_addr);
107 if (ret != 0) 109 if (ret != 0)
108 goto exit2; 110 goto exit1;
109 111
110 ipw->is_v2_card = resource_size(p_dev->resource[2]) == 0x100; 112 ipw->is_v2_card = resource_size(p_dev->resource[2]) == 0x100;
111 113
112 ipw->attr_memory = ioremap(p_dev->resource[2]->start, 114 ipw->common_memory = ioremap(p_dev->resource[2]->start,
113 resource_size(p_dev->resource[2])); 115 resource_size(p_dev->resource[2]));
114 request_mem_region(p_dev->resource[2]->start, 116 if (!request_mem_region(p_dev->resource[2]->start,
115 resource_size(p_dev->resource[2]), 117 resource_size(p_dev->resource[2]),
116 IPWIRELESS_PCCARD_NAME); 118 IPWIRELESS_PCCARD_NAME)) {
119 ret = -EBUSY;
120 goto exit2;
121 }
117 122
118 p_dev->resource[3]->flags |= WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_AM | 123 p_dev->resource[3]->flags |= WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_AM |
119 WIN_ENABLE; 124 WIN_ENABLE;
120 p_dev->resource[3]->end = 0; /* this used to be 0x1000 */ 125 p_dev->resource[3]->end = 0; /* this used to be 0x1000 */
121 ret = pcmcia_request_window(p_dev, p_dev->resource[3], 0); 126 ret = pcmcia_request_window(p_dev, p_dev->resource[3], 0);
122 if (ret != 0) 127 if (ret != 0)
123 goto exit2; 128 goto exit3;
124 129
125 ret = pcmcia_map_mem_page(p_dev, p_dev->resource[3], 0); 130 ret = pcmcia_map_mem_page(p_dev, p_dev->resource[3], 0);
126 if (ret != 0) 131 if (ret != 0)
@@ -128,23 +133,28 @@ static int ipwireless_probe(struct pcmcia_device *p_dev, void *priv_data)
128 133
129 ipw->attr_memory = ioremap(p_dev->resource[3]->start, 134 ipw->attr_memory = ioremap(p_dev->resource[3]->start,
130 resource_size(p_dev->resource[3])); 135 resource_size(p_dev->resource[3]));
131 request_mem_region(p_dev->resource[3]->start, 136 if (!request_mem_region(p_dev->resource[3]->start,
132 resource_size(p_dev->resource[3]), 137 resource_size(p_dev->resource[3]),
133 IPWIRELESS_PCCARD_NAME); 138 IPWIRELESS_PCCARD_NAME)) {
139 ret = -EBUSY;
140 goto exit4;
141 }
134 142
135 return 0; 143 return 0;
136 144
145exit4:
146 iounmap(ipw->attr_memory);
137exit3: 147exit3:
148 release_mem_region(p_dev->resource[2]->start,
149 resource_size(p_dev->resource[2]));
138exit2: 150exit2:
139 if (ipw->common_memory) { 151 iounmap(ipw->common_memory);
140 release_mem_region(p_dev->resource[2]->start,
141 resource_size(p_dev->resource[2]));
142 iounmap(ipw->common_memory);
143 }
144exit1: 152exit1:
145 release_resource(io_resource); 153 release_region(p_dev->resource[0]->start,
154 resource_size(p_dev->resource[0]));
155exit:
146 pcmcia_disable_device(p_dev); 156 pcmcia_disable_device(p_dev);
147 return -1; 157 return ret;
148} 158}
149 159
150static int config_ipwireless(struct ipw_dev *ipw) 160static int config_ipwireless(struct ipw_dev *ipw)
@@ -219,6 +229,8 @@ exit:
219 229
220static void release_ipwireless(struct ipw_dev *ipw) 230static void release_ipwireless(struct ipw_dev *ipw)
221{ 231{
232 release_region(ipw->link->resource[0]->start,
233 resource_size(ipw->link->resource[0]));
222 if (ipw->common_memory) { 234 if (ipw->common_memory) {
223 release_mem_region(ipw->link->resource[2]->start, 235 release_mem_region(ipw->link->resource[2]->start,
224 resource_size(ipw->link->resource[2])); 236 resource_size(ipw->link->resource[2]));
diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
index 36e0fa161c2b..1f46f1cd9225 100644
--- a/drivers/char/tpm/tpm.c
+++ b/drivers/char/tpm/tpm.c
@@ -364,14 +364,12 @@ unsigned long tpm_calc_ordinal_duration(struct tpm_chip *chip,
364 tpm_protected_ordinal_duration[ordinal & 364 tpm_protected_ordinal_duration[ordinal &
365 TPM_PROTECTED_ORDINAL_MASK]; 365 TPM_PROTECTED_ORDINAL_MASK];
366 366
367 if (duration_idx != TPM_UNDEFINED) { 367 if (duration_idx != TPM_UNDEFINED)
368 duration = chip->vendor.duration[duration_idx]; 368 duration = chip->vendor.duration[duration_idx];
369 /* if duration is 0, it's because chip->vendor.duration wasn't */ 369 if (duration <= 0)
370 /* filled yet, so we set the lowest timeout just to give enough */
371 /* time for tpm_get_timeouts() to succeed */
372 return (duration <= 0 ? HZ : duration);
373 } else
374 return 2 * 60 * HZ; 370 return 2 * 60 * HZ;
371 else
372 return duration;
375} 373}
376EXPORT_SYMBOL_GPL(tpm_calc_ordinal_duration); 374EXPORT_SYMBOL_GPL(tpm_calc_ordinal_duration);
377 375
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 490393186338..84b164d1eb2b 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -388,6 +388,10 @@ static void discard_port_data(struct port *port)
388 unsigned int len; 388 unsigned int len;
389 int ret; 389 int ret;
390 390
391 if (!port->portdev) {
392 /* Device has been unplugged. vqs are already gone. */
393 return;
394 }
391 vq = port->in_vq; 395 vq = port->in_vq;
392 if (port->inbuf) 396 if (port->inbuf)
393 buf = port->inbuf; 397 buf = port->inbuf;
@@ -470,6 +474,10 @@ static void reclaim_consumed_buffers(struct port *port)
470 void *buf; 474 void *buf;
471 unsigned int len; 475 unsigned int len;
472 476
477 if (!port->portdev) {
478 /* Device has been unplugged. vqs are already gone. */
479 return;
480 }
473 while ((buf = virtqueue_get_buf(port->out_vq, &len))) { 481 while ((buf = virtqueue_get_buf(port->out_vq, &len))) {
474 kfree(buf); 482 kfree(buf);
475 port->outvq_full = false; 483 port->outvq_full = false;
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 1109f6848a43..5cb4d09919d6 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1919,8 +1919,10 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1919 1919
1920 ret = sysdev_driver_register(&cpu_sysdev_class, 1920 ret = sysdev_driver_register(&cpu_sysdev_class,
1921 &cpufreq_sysdev_driver); 1921 &cpufreq_sysdev_driver);
1922 if (ret)
1923 goto err_null_driver;
1922 1924
1923 if ((!ret) && !(cpufreq_driver->flags & CPUFREQ_STICKY)) { 1925 if (!(cpufreq_driver->flags & CPUFREQ_STICKY)) {
1924 int i; 1926 int i;
1925 ret = -ENODEV; 1927 ret = -ENODEV;
1926 1928
@@ -1935,21 +1937,22 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1935 if (ret) { 1937 if (ret) {
1936 dprintk("no CPU initialized for driver %s\n", 1938 dprintk("no CPU initialized for driver %s\n",
1937 driver_data->name); 1939 driver_data->name);
1938 sysdev_driver_unregister(&cpu_sysdev_class, 1940 goto err_sysdev_unreg;
1939 &cpufreq_sysdev_driver);
1940
1941 spin_lock_irqsave(&cpufreq_driver_lock, flags);
1942 cpufreq_driver = NULL;
1943 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1944 } 1941 }
1945 } 1942 }
1946 1943
1947 if (!ret) { 1944 register_hotcpu_notifier(&cpufreq_cpu_notifier);
1948 register_hotcpu_notifier(&cpufreq_cpu_notifier); 1945 dprintk("driver %s up and running\n", driver_data->name);
1949 dprintk("driver %s up and running\n", driver_data->name); 1946 cpufreq_debug_enable_ratelimit();
1950 cpufreq_debug_enable_ratelimit();
1951 }
1952 1947
1948 return 0;
1949err_sysdev_unreg:
1950 sysdev_driver_unregister(&cpu_sysdev_class,
1951 &cpufreq_sysdev_driver);
1952err_null_driver:
1953 spin_lock_irqsave(&cpufreq_driver_lock, flags);
1954 cpufreq_driver = NULL;
1955 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1953 return ret; 1956 return ret;
1954} 1957}
1955EXPORT_SYMBOL_GPL(cpufreq_register_driver); 1958EXPORT_SYMBOL_GPL(cpufreq_register_driver);
diff --git a/drivers/gpio/ml_ioh_gpio.c b/drivers/gpio/ml_ioh_gpio.c
index cead8e6ff345..7f6f01a4b145 100644
--- a/drivers/gpio/ml_ioh_gpio.c
+++ b/drivers/gpio/ml_ioh_gpio.c
@@ -326,6 +326,7 @@ static DEFINE_PCI_DEVICE_TABLE(ioh_gpio_pcidev_id) = {
326 { PCI_DEVICE(PCI_VENDOR_ID_ROHM, 0x802E) }, 326 { PCI_DEVICE(PCI_VENDOR_ID_ROHM, 0x802E) },
327 { 0, } 327 { 0, }
328}; 328};
329MODULE_DEVICE_TABLE(pci, ioh_gpio_pcidev_id);
329 330
330static struct pci_driver ioh_gpio_driver = { 331static struct pci_driver ioh_gpio_driver = {
331 .name = "ml_ioh_gpio", 332 .name = "ml_ioh_gpio",
diff --git a/drivers/gpio/pch_gpio.c b/drivers/gpio/pch_gpio.c
index 0eba0a75c804..2c6af8705103 100644
--- a/drivers/gpio/pch_gpio.c
+++ b/drivers/gpio/pch_gpio.c
@@ -286,6 +286,7 @@ static DEFINE_PCI_DEVICE_TABLE(pch_gpio_pcidev_id) = {
286 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x8803) }, 286 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x8803) },
287 { 0, } 287 { 0, }
288}; 288};
289MODULE_DEVICE_TABLE(pci, pch_gpio_pcidev_id);
289 290
290static struct pci_driver pch_gpio_driver = { 291static struct pci_driver pch_gpio_driver = {
291 .name = "pch_gpio", 292 .name = "pch_gpio",
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 6977a1ce9d98..f73ef4390db6 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -672,7 +672,7 @@ int drm_fb_helper_setcmap(struct fb_cmap *cmap, struct fb_info *info)
672 struct drm_crtc_helper_funcs *crtc_funcs; 672 struct drm_crtc_helper_funcs *crtc_funcs;
673 u16 *red, *green, *blue, *transp; 673 u16 *red, *green, *blue, *transp;
674 struct drm_crtc *crtc; 674 struct drm_crtc *crtc;
675 int i, rc = 0; 675 int i, j, rc = 0;
676 int start; 676 int start;
677 677
678 for (i = 0; i < fb_helper->crtc_count; i++) { 678 for (i = 0; i < fb_helper->crtc_count; i++) {
@@ -685,7 +685,7 @@ int drm_fb_helper_setcmap(struct fb_cmap *cmap, struct fb_info *info)
685 transp = cmap->transp; 685 transp = cmap->transp;
686 start = cmap->start; 686 start = cmap->start;
687 687
688 for (i = 0; i < cmap->len; i++) { 688 for (j = 0; j < cmap->len; j++) {
689 u16 hred, hgreen, hblue, htransp = 0xffff; 689 u16 hred, hgreen, hblue, htransp = 0xffff;
690 690
691 hred = *red++; 691 hred = *red++;
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 3dadfa2a8528..28d1d3c24d65 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -164,8 +164,10 @@ static void vblank_disable_and_save(struct drm_device *dev, int crtc)
164 * available. In that case we can't account for this and just 164 * available. In that case we can't account for this and just
165 * hope for the best. 165 * hope for the best.
166 */ 166 */
167 if ((vblrc > 0) && (abs(diff_ns) > 1000000)) 167 if ((vblrc > 0) && (abs64(diff_ns) > 1000000)) {
168 atomic_inc(&dev->_vblank_count[crtc]); 168 atomic_inc(&dev->_vblank_count[crtc]);
169 smp_mb__after_atomic_inc();
170 }
169 171
170 /* Invalidate all timestamps while vblank irq's are off. */ 172 /* Invalidate all timestamps while vblank irq's are off. */
171 clear_vblank_timestamps(dev, crtc); 173 clear_vblank_timestamps(dev, crtc);
@@ -491,6 +493,12 @@ void drm_calc_timestamping_constants(struct drm_crtc *crtc)
491 /* Dot clock in Hz: */ 493 /* Dot clock in Hz: */
492 dotclock = (u64) crtc->hwmode.clock * 1000; 494 dotclock = (u64) crtc->hwmode.clock * 1000;
493 495
496 /* Fields of interlaced scanout modes are only halve a frame duration.
497 * Double the dotclock to get halve the frame-/line-/pixelduration.
498 */
499 if (crtc->hwmode.flags & DRM_MODE_FLAG_INTERLACE)
500 dotclock *= 2;
501
494 /* Valid dotclock? */ 502 /* Valid dotclock? */
495 if (dotclock > 0) { 503 if (dotclock > 0) {
496 /* Convert scanline length in pixels and video dot clock to 504 /* Convert scanline length in pixels and video dot clock to
@@ -603,14 +611,6 @@ int drm_calc_vbltimestamp_from_scanoutpos(struct drm_device *dev, int crtc,
603 return -EAGAIN; 611 return -EAGAIN;
604 } 612 }
605 613
606 /* Don't know yet how to handle interlaced or
607 * double scan modes. Just no-op for now.
608 */
609 if (mode->flags & (DRM_MODE_FLAG_INTERLACE | DRM_MODE_FLAG_DBLSCAN)) {
610 DRM_DEBUG("crtc %d: Noop due to unsupported mode.\n", crtc);
611 return -ENOTSUPP;
612 }
613
614 /* Get current scanout position with system timestamp. 614 /* Get current scanout position with system timestamp.
615 * Repeat query up to DRM_TIMESTAMP_MAXRETRIES times 615 * Repeat query up to DRM_TIMESTAMP_MAXRETRIES times
616 * if single query takes longer than max_error nanoseconds. 616 * if single query takes longer than max_error nanoseconds.
@@ -858,10 +858,11 @@ static void drm_update_vblank_count(struct drm_device *dev, int crtc)
858 if (rc) { 858 if (rc) {
859 tslot = atomic_read(&dev->_vblank_count[crtc]) + diff; 859 tslot = atomic_read(&dev->_vblank_count[crtc]) + diff;
860 vblanktimestamp(dev, crtc, tslot) = t_vblank; 860 vblanktimestamp(dev, crtc, tslot) = t_vblank;
861 smp_wmb();
862 } 861 }
863 862
863 smp_mb__before_atomic_inc();
864 atomic_add(diff, &dev->_vblank_count[crtc]); 864 atomic_add(diff, &dev->_vblank_count[crtc]);
865 smp_mb__after_atomic_inc();
865} 866}
866 867
867/** 868/**
@@ -1011,7 +1012,8 @@ int drm_modeset_ctl(struct drm_device *dev, void *data,
1011 struct drm_file *file_priv) 1012 struct drm_file *file_priv)
1012{ 1013{
1013 struct drm_modeset_ctl *modeset = data; 1014 struct drm_modeset_ctl *modeset = data;
1014 int crtc, ret = 0; 1015 int ret = 0;
1016 unsigned int crtc;
1015 1017
1016 /* If drm_vblank_init() hasn't been called yet, just no-op */ 1018 /* If drm_vblank_init() hasn't been called yet, just no-op */
1017 if (!dev->num_crtcs) 1019 if (!dev->num_crtcs)
@@ -1293,15 +1295,16 @@ bool drm_handle_vblank(struct drm_device *dev, int crtc)
1293 * e.g., due to spurious vblank interrupts. We need to 1295 * e.g., due to spurious vblank interrupts. We need to
1294 * ignore those for accounting. 1296 * ignore those for accounting.
1295 */ 1297 */
1296 if (abs(diff_ns) > DRM_REDUNDANT_VBLIRQ_THRESH_NS) { 1298 if (abs64(diff_ns) > DRM_REDUNDANT_VBLIRQ_THRESH_NS) {
1297 /* Store new timestamp in ringbuffer. */ 1299 /* Store new timestamp in ringbuffer. */
1298 vblanktimestamp(dev, crtc, vblcount + 1) = tvblank; 1300 vblanktimestamp(dev, crtc, vblcount + 1) = tvblank;
1299 smp_wmb();
1300 1301
1301 /* Increment cooked vblank count. This also atomically commits 1302 /* Increment cooked vblank count. This also atomically commits
1302 * the timestamp computed above. 1303 * the timestamp computed above.
1303 */ 1304 */
1305 smp_mb__before_atomic_inc();
1304 atomic_inc(&dev->_vblank_count[crtc]); 1306 atomic_inc(&dev->_vblank_count[crtc]);
1307 smp_mb__after_atomic_inc();
1305 } else { 1308 } else {
1306 DRM_DEBUG("crtc %d: Redundant vblirq ignored. diff_ns = %d\n", 1309 DRM_DEBUG("crtc %d: Redundant vblirq ignored. diff_ns = %d\n",
1307 crtc, (int) diff_ns); 1310 crtc, (int) diff_ns);
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 3601466c5502..4ff9b6cc973f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -865,7 +865,7 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused)
865 int max_freq; 865 int max_freq;
866 866
867 /* RPSTAT1 is in the GT power well */ 867 /* RPSTAT1 is in the GT power well */
868 __gen6_force_wake_get(dev_priv); 868 __gen6_gt_force_wake_get(dev_priv);
869 869
870 seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status); 870 seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
871 seq_printf(m, "RPSTAT1: 0x%08x\n", I915_READ(GEN6_RPSTAT1)); 871 seq_printf(m, "RPSTAT1: 0x%08x\n", I915_READ(GEN6_RPSTAT1));
@@ -888,7 +888,7 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused)
888 seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", 888 seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
889 max_freq * 100); 889 max_freq * 100);
890 890
891 __gen6_force_wake_put(dev_priv); 891 __gen6_gt_force_wake_put(dev_priv);
892 } else { 892 } else {
893 seq_printf(m, "no P-state info available\n"); 893 seq_printf(m, "no P-state info available\n");
894 } 894 }
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 17bd766f2081..e33d9be7df3b 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1895,6 +1895,17 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
1895 if (IS_GEN2(dev)) 1895 if (IS_GEN2(dev))
1896 dma_set_coherent_mask(&dev->pdev->dev, DMA_BIT_MASK(30)); 1896 dma_set_coherent_mask(&dev->pdev->dev, DMA_BIT_MASK(30));
1897 1897
1898 /* 965GM sometimes incorrectly writes to hardware status page (HWS)
1899 * using 32bit addressing, overwriting memory if HWS is located
1900 * above 4GB.
1901 *
1902 * The documentation also mentions an issue with undefined
1903 * behaviour if any general state is accessed within a page above 4GB,
1904 * which also needs to be handled carefully.
1905 */
1906 if (IS_BROADWATER(dev) || IS_CRESTLINE(dev))
1907 dma_set_coherent_mask(&dev->pdev->dev, DMA_BIT_MASK(32));
1908
1898 mmio_bar = IS_GEN2(dev) ? 1 : 0; 1909 mmio_bar = IS_GEN2(dev) ? 1 : 0;
1899 dev_priv->regs = pci_iomap(dev->pdev, mmio_bar, 0); 1910 dev_priv->regs = pci_iomap(dev->pdev, mmio_bar, 0);
1900 if (!dev_priv->regs) { 1911 if (!dev_priv->regs) {
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 0ad533f06af9..22ec066adae6 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -46,6 +46,9 @@ module_param_named(fbpercrtc, i915_fbpercrtc, int, 0400);
46unsigned int i915_powersave = 1; 46unsigned int i915_powersave = 1;
47module_param_named(powersave, i915_powersave, int, 0600); 47module_param_named(powersave, i915_powersave, int, 0600);
48 48
49unsigned int i915_semaphores = 0;
50module_param_named(semaphores, i915_semaphores, int, 0600);
51
49unsigned int i915_enable_rc6 = 0; 52unsigned int i915_enable_rc6 = 0;
50module_param_named(i915_enable_rc6, i915_enable_rc6, int, 0600); 53module_param_named(i915_enable_rc6, i915_enable_rc6, int, 0600);
51 54
@@ -254,7 +257,7 @@ void intel_detect_pch (struct drm_device *dev)
254 } 257 }
255} 258}
256 259
257void __gen6_force_wake_get(struct drm_i915_private *dev_priv) 260void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
258{ 261{
259 int count; 262 int count;
260 263
@@ -270,12 +273,22 @@ void __gen6_force_wake_get(struct drm_i915_private *dev_priv)
270 udelay(10); 273 udelay(10);
271} 274}
272 275
273void __gen6_force_wake_put(struct drm_i915_private *dev_priv) 276void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
274{ 277{
275 I915_WRITE_NOTRACE(FORCEWAKE, 0); 278 I915_WRITE_NOTRACE(FORCEWAKE, 0);
276 POSTING_READ(FORCEWAKE); 279 POSTING_READ(FORCEWAKE);
277} 280}
278 281
282void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
283{
284 int loop = 500;
285 u32 fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
286 while (fifo < 20 && loop--) {
287 udelay(10);
288 fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
289 }
290}
291
279static int i915_drm_freeze(struct drm_device *dev) 292static int i915_drm_freeze(struct drm_device *dev)
280{ 293{
281 struct drm_i915_private *dev_priv = dev->dev_private; 294 struct drm_i915_private *dev_priv = dev->dev_private;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 65dfe81d0035..456f40484838 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -956,6 +956,7 @@ extern struct drm_ioctl_desc i915_ioctls[];
956extern int i915_max_ioctl; 956extern int i915_max_ioctl;
957extern unsigned int i915_fbpercrtc; 957extern unsigned int i915_fbpercrtc;
958extern unsigned int i915_powersave; 958extern unsigned int i915_powersave;
959extern unsigned int i915_semaphores;
959extern unsigned int i915_lvds_downclock; 960extern unsigned int i915_lvds_downclock;
960extern unsigned int i915_panel_use_ssc; 961extern unsigned int i915_panel_use_ssc;
961extern unsigned int i915_enable_rc6; 962extern unsigned int i915_enable_rc6;
@@ -1177,6 +1178,9 @@ void i915_gem_detach_phys_object(struct drm_device *dev,
1177void i915_gem_free_all_phys_object(struct drm_device *dev); 1178void i915_gem_free_all_phys_object(struct drm_device *dev);
1178void i915_gem_release(struct drm_device *dev, struct drm_file *file); 1179void i915_gem_release(struct drm_device *dev, struct drm_file *file);
1179 1180
1181uint32_t
1182i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj);
1183
1180/* i915_gem_gtt.c */ 1184/* i915_gem_gtt.c */
1181void i915_gem_restore_gtt_mappings(struct drm_device *dev); 1185void i915_gem_restore_gtt_mappings(struct drm_device *dev);
1182int __must_check i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj); 1186int __must_check i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj);
@@ -1353,22 +1357,32 @@ __i915_write(64, q)
1353 * must be set to prevent GT core from power down and stale values being 1357 * must be set to prevent GT core from power down and stale values being
1354 * returned. 1358 * returned.
1355 */ 1359 */
1356void __gen6_force_wake_get(struct drm_i915_private *dev_priv); 1360void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
1357void __gen6_force_wake_put (struct drm_i915_private *dev_priv); 1361void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
1358static inline u32 i915_safe_read(struct drm_i915_private *dev_priv, u32 reg) 1362void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
1363
1364static inline u32 i915_gt_read(struct drm_i915_private *dev_priv, u32 reg)
1359{ 1365{
1360 u32 val; 1366 u32 val;
1361 1367
1362 if (dev_priv->info->gen >= 6) { 1368 if (dev_priv->info->gen >= 6) {
1363 __gen6_force_wake_get(dev_priv); 1369 __gen6_gt_force_wake_get(dev_priv);
1364 val = I915_READ(reg); 1370 val = I915_READ(reg);
1365 __gen6_force_wake_put(dev_priv); 1371 __gen6_gt_force_wake_put(dev_priv);
1366 } else 1372 } else
1367 val = I915_READ(reg); 1373 val = I915_READ(reg);
1368 1374
1369 return val; 1375 return val;
1370} 1376}
1371 1377
1378static inline void i915_gt_write(struct drm_i915_private *dev_priv,
1379 u32 reg, u32 val)
1380{
1381 if (dev_priv->info->gen >= 6)
1382 __gen6_gt_wait_for_fifo(dev_priv);
1383 I915_WRITE(reg, val);
1384}
1385
1372static inline void 1386static inline void
1373i915_write(struct drm_i915_private *dev_priv, u32 reg, u64 val, int len) 1387i915_write(struct drm_i915_private *dev_priv, u32 reg, u64 val, int len)
1374{ 1388{
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index cf4f74c7c6fb..36e66cc5225e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1398,7 +1398,7 @@ i915_gem_get_gtt_alignment(struct drm_i915_gem_object *obj)
1398 * Return the required GTT alignment for an object, only taking into account 1398 * Return the required GTT alignment for an object, only taking into account
1399 * unfenced tiled surface requirements. 1399 * unfenced tiled surface requirements.
1400 */ 1400 */
1401static uint32_t 1401uint32_t
1402i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj) 1402i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj)
1403{ 1403{
1404 struct drm_device *dev = obj->base.dev; 1404 struct drm_device *dev = obj->base.dev;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index d2f445e825f2..50ab1614571c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -772,8 +772,8 @@ i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj,
772 if (from == NULL || to == from) 772 if (from == NULL || to == from)
773 return 0; 773 return 0;
774 774
775 /* XXX gpu semaphores are currently causing hard hangs on SNB mobile */ 775 /* XXX gpu semaphores are implicated in various hard hangs on SNB */
776 if (INTEL_INFO(obj->base.dev)->gen < 6 || IS_MOBILE(obj->base.dev)) 776 if (INTEL_INFO(obj->base.dev)->gen < 6 || !i915_semaphores)
777 return i915_gem_object_wait_rendering(obj, true); 777 return i915_gem_object_wait_rendering(obj, true);
778 778
779 idx = intel_ring_sync_index(from, to); 779 idx = intel_ring_sync_index(from, to);
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 22a32b9932c5..d64843e18df2 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -349,14 +349,27 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
349 (obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end && 349 (obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end &&
350 i915_gem_object_fence_ok(obj, args->tiling_mode)); 350 i915_gem_object_fence_ok(obj, args->tiling_mode));
351 351
352 obj->tiling_changed = true; 352 /* Rebind if we need a change of alignment */
353 obj->tiling_mode = args->tiling_mode; 353 if (!obj->map_and_fenceable) {
354 obj->stride = args->stride; 354 u32 unfenced_alignment =
355 i915_gem_get_unfenced_gtt_alignment(obj);
356 if (obj->gtt_offset & (unfenced_alignment - 1))
357 ret = i915_gem_object_unbind(obj);
358 }
359
360 if (ret == 0) {
361 obj->tiling_changed = true;
362 obj->tiling_mode = args->tiling_mode;
363 obj->stride = args->stride;
364 }
355 } 365 }
366 /* we have to maintain this existing ABI... */
367 args->stride = obj->stride;
368 args->tiling_mode = obj->tiling_mode;
356 drm_gem_object_unreference(&obj->base); 369 drm_gem_object_unreference(&obj->base);
357 mutex_unlock(&dev->struct_mutex); 370 mutex_unlock(&dev->struct_mutex);
358 371
359 return 0; 372 return ret;
360} 373}
361 374
362/** 375/**
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 97f946dcc1aa..8a9e08bf1cf7 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -316,6 +316,8 @@ static void i915_hotplug_work_func(struct work_struct *work)
316 struct drm_mode_config *mode_config = &dev->mode_config; 316 struct drm_mode_config *mode_config = &dev->mode_config;
317 struct intel_encoder *encoder; 317 struct intel_encoder *encoder;
318 318
319 DRM_DEBUG_KMS("running encoder hotplug functions\n");
320
319 list_for_each_entry(encoder, &mode_config->encoder_list, base.head) 321 list_for_each_entry(encoder, &mode_config->encoder_list, base.head)
320 if (encoder->hot_plug) 322 if (encoder->hot_plug)
321 encoder->hot_plug(encoder); 323 encoder->hot_plug(encoder);
@@ -1649,9 +1651,7 @@ static int ironlake_irq_postinstall(struct drm_device *dev)
1649 } else { 1651 } else {
1650 hotplug_mask = SDE_CRT_HOTPLUG | SDE_PORTB_HOTPLUG | 1652 hotplug_mask = SDE_CRT_HOTPLUG | SDE_PORTB_HOTPLUG |
1651 SDE_PORTC_HOTPLUG | SDE_PORTD_HOTPLUG; 1653 SDE_PORTC_HOTPLUG | SDE_PORTD_HOTPLUG;
1652 hotplug_mask |= SDE_AUX_MASK | SDE_FDI_MASK | SDE_TRANS_MASK; 1654 hotplug_mask |= SDE_AUX_MASK;
1653 I915_WRITE(FDI_RXA_IMR, 0);
1654 I915_WRITE(FDI_RXB_IMR, 0);
1655 } 1655 }
1656 1656
1657 dev_priv->pch_irq_mask = ~hotplug_mask; 1657 dev_priv->pch_irq_mask = ~hotplug_mask;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 729d4233b763..2abe240dae58 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1553,7 +1553,17 @@
1553 1553
1554/* Backlight control */ 1554/* Backlight control */
1555#define BLC_PWM_CTL 0x61254 1555#define BLC_PWM_CTL 0x61254
1556#define BACKLIGHT_MODULATION_FREQ_SHIFT (17)
1556#define BLC_PWM_CTL2 0x61250 /* 965+ only */ 1557#define BLC_PWM_CTL2 0x61250 /* 965+ only */
1558#define BLM_COMBINATION_MODE (1 << 30)
1559/*
1560 * This is the most significant 15 bits of the number of backlight cycles in a
1561 * complete cycle of the modulated backlight control.
1562 *
1563 * The actual value is this field multiplied by two.
1564 */
1565#define BACKLIGHT_MODULATION_FREQ_MASK (0x7fff << 17)
1566#define BLM_LEGACY_MODE (1 << 16)
1557/* 1567/*
1558 * This is the number of cycles out of the backlight modulation cycle for which 1568 * This is the number of cycles out of the backlight modulation cycle for which
1559 * the backlight is on. 1569 * the backlight is on.
@@ -3261,6 +3271,8 @@
3261#define FORCEWAKE 0xA18C 3271#define FORCEWAKE 0xA18C
3262#define FORCEWAKE_ACK 0x130090 3272#define FORCEWAKE_ACK 0x130090
3263 3273
3274#define GT_FIFO_FREE_ENTRIES 0x120008
3275
3264#define GEN6_RPNSWREQ 0xA008 3276#define GEN6_RPNSWREQ 0xA008
3265#define GEN6_TURBO_DISABLE (1<<31) 3277#define GEN6_TURBO_DISABLE (1<<31)
3266#define GEN6_FREQUENCY(x) ((x)<<25) 3278#define GEN6_FREQUENCY(x) ((x)<<25)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3b006536b3d2..49fb54fd9a18 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1219,7 +1219,7 @@ static void sandybridge_blit_fbc_update(struct drm_device *dev)
1219 u32 blt_ecoskpd; 1219 u32 blt_ecoskpd;
1220 1220
1221 /* Make sure blitter notifies FBC of writes */ 1221 /* Make sure blitter notifies FBC of writes */
1222 __gen6_force_wake_get(dev_priv); 1222 __gen6_gt_force_wake_get(dev_priv);
1223 blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD); 1223 blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD);
1224 blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY << 1224 blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY <<
1225 GEN6_BLITTER_LOCK_SHIFT; 1225 GEN6_BLITTER_LOCK_SHIFT;
@@ -1230,7 +1230,7 @@ static void sandybridge_blit_fbc_update(struct drm_device *dev)
1230 GEN6_BLITTER_LOCK_SHIFT); 1230 GEN6_BLITTER_LOCK_SHIFT);
1231 I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd); 1231 I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
1232 POSTING_READ(GEN6_BLITTER_ECOSKPD); 1232 POSTING_READ(GEN6_BLITTER_ECOSKPD);
1233 __gen6_force_wake_put(dev_priv); 1233 __gen6_gt_force_wake_put(dev_priv);
1234} 1234}
1235 1235
1236static void ironlake_enable_fbc(struct drm_crtc *crtc, unsigned long interval) 1236static void ironlake_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
@@ -1630,19 +1630,19 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
1630 struct drm_i915_gem_object *obj = to_intel_framebuffer(old_fb)->obj; 1630 struct drm_i915_gem_object *obj = to_intel_framebuffer(old_fb)->obj;
1631 1631
1632 wait_event(dev_priv->pending_flip_queue, 1632 wait_event(dev_priv->pending_flip_queue,
1633 atomic_read(&dev_priv->mm.wedged) ||
1633 atomic_read(&obj->pending_flip) == 0); 1634 atomic_read(&obj->pending_flip) == 0);
1634 1635
1635 /* Big Hammer, we also need to ensure that any pending 1636 /* Big Hammer, we also need to ensure that any pending
1636 * MI_WAIT_FOR_EVENT inside a user batch buffer on the 1637 * MI_WAIT_FOR_EVENT inside a user batch buffer on the
1637 * current scanout is retired before unpinning the old 1638 * current scanout is retired before unpinning the old
1638 * framebuffer. 1639 * framebuffer.
1640 *
1641 * This should only fail upon a hung GPU, in which case we
1642 * can safely continue.
1639 */ 1643 */
1640 ret = i915_gem_object_flush_gpu(obj, false); 1644 ret = i915_gem_object_flush_gpu(obj, false);
1641 if (ret) { 1645 (void) ret;
1642 i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj);
1643 mutex_unlock(&dev->struct_mutex);
1644 return ret;
1645 }
1646 } 1646 }
1647 1647
1648 ret = intel_pipe_set_base_atomic(crtc, crtc->fb, x, y, 1648 ret = intel_pipe_set_base_atomic(crtc, crtc->fb, x, y,
@@ -2045,6 +2045,31 @@ static void intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc)
2045 atomic_read(&obj->pending_flip) == 0); 2045 atomic_read(&obj->pending_flip) == 0);
2046} 2046}
2047 2047
2048static bool intel_crtc_driving_pch(struct drm_crtc *crtc)
2049{
2050 struct drm_device *dev = crtc->dev;
2051 struct drm_mode_config *mode_config = &dev->mode_config;
2052 struct intel_encoder *encoder;
2053
2054 /*
2055 * If there's a non-PCH eDP on this crtc, it must be DP_A, and that
2056 * must be driven by its own crtc; no sharing is possible.
2057 */
2058 list_for_each_entry(encoder, &mode_config->encoder_list, base.head) {
2059 if (encoder->base.crtc != crtc)
2060 continue;
2061
2062 switch (encoder->type) {
2063 case INTEL_OUTPUT_EDP:
2064 if (!intel_encoder_is_pch_edp(&encoder->base))
2065 return false;
2066 continue;
2067 }
2068 }
2069
2070 return true;
2071}
2072
2048static void ironlake_crtc_enable(struct drm_crtc *crtc) 2073static void ironlake_crtc_enable(struct drm_crtc *crtc)
2049{ 2074{
2050 struct drm_device *dev = crtc->dev; 2075 struct drm_device *dev = crtc->dev;
@@ -2053,6 +2078,7 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc)
2053 int pipe = intel_crtc->pipe; 2078 int pipe = intel_crtc->pipe;
2054 int plane = intel_crtc->plane; 2079 int plane = intel_crtc->plane;
2055 u32 reg, temp; 2080 u32 reg, temp;
2081 bool is_pch_port = false;
2056 2082
2057 if (intel_crtc->active) 2083 if (intel_crtc->active)
2058 return; 2084 return;
@@ -2066,7 +2092,56 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc)
2066 I915_WRITE(PCH_LVDS, temp | LVDS_PORT_EN); 2092 I915_WRITE(PCH_LVDS, temp | LVDS_PORT_EN);
2067 } 2093 }
2068 2094
2069 ironlake_fdi_enable(crtc); 2095 is_pch_port = intel_crtc_driving_pch(crtc);
2096
2097 if (is_pch_port)
2098 ironlake_fdi_enable(crtc);
2099 else {
2100 /* disable CPU FDI tx and PCH FDI rx */
2101 reg = FDI_TX_CTL(pipe);
2102 temp = I915_READ(reg);
2103 I915_WRITE(reg, temp & ~FDI_TX_ENABLE);
2104 POSTING_READ(reg);
2105
2106 reg = FDI_RX_CTL(pipe);
2107 temp = I915_READ(reg);
2108 temp &= ~(0x7 << 16);
2109 temp |= (I915_READ(PIPECONF(pipe)) & PIPE_BPC_MASK) << 11;
2110 I915_WRITE(reg, temp & ~FDI_RX_ENABLE);
2111
2112 POSTING_READ(reg);
2113 udelay(100);
2114
2115 /* Ironlake workaround, disable clock pointer after downing FDI */
2116 if (HAS_PCH_IBX(dev))
2117 I915_WRITE(FDI_RX_CHICKEN(pipe),
2118 I915_READ(FDI_RX_CHICKEN(pipe) &
2119 ~FDI_RX_PHASE_SYNC_POINTER_ENABLE));
2120
2121 /* still set train pattern 1 */
2122 reg = FDI_TX_CTL(pipe);
2123 temp = I915_READ(reg);
2124 temp &= ~FDI_LINK_TRAIN_NONE;
2125 temp |= FDI_LINK_TRAIN_PATTERN_1;
2126 I915_WRITE(reg, temp);
2127
2128 reg = FDI_RX_CTL(pipe);
2129 temp = I915_READ(reg);
2130 if (HAS_PCH_CPT(dev)) {
2131 temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT;
2132 temp |= FDI_LINK_TRAIN_PATTERN_1_CPT;
2133 } else {
2134 temp &= ~FDI_LINK_TRAIN_NONE;
2135 temp |= FDI_LINK_TRAIN_PATTERN_1;
2136 }
2137 /* BPC in FDI rx is consistent with that in PIPECONF */
2138 temp &= ~(0x07 << 16);
2139 temp |= (I915_READ(PIPECONF(pipe)) & PIPE_BPC_MASK) << 11;
2140 I915_WRITE(reg, temp);
2141
2142 POSTING_READ(reg);
2143 udelay(100);
2144 }
2070 2145
2071 /* Enable panel fitting for LVDS */ 2146 /* Enable panel fitting for LVDS */
2072 if (dev_priv->pch_pf_size && 2147 if (dev_priv->pch_pf_size &&
@@ -2100,6 +2175,10 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc)
2100 intel_flush_display_plane(dev, plane); 2175 intel_flush_display_plane(dev, plane);
2101 } 2176 }
2102 2177
2178 /* Skip the PCH stuff if possible */
2179 if (!is_pch_port)
2180 goto done;
2181
2103 /* For PCH output, training FDI link */ 2182 /* For PCH output, training FDI link */
2104 if (IS_GEN6(dev)) 2183 if (IS_GEN6(dev))
2105 gen6_fdi_link_train(crtc); 2184 gen6_fdi_link_train(crtc);
@@ -2184,7 +2263,7 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc)
2184 I915_WRITE(reg, temp | TRANS_ENABLE); 2263 I915_WRITE(reg, temp | TRANS_ENABLE);
2185 if (wait_for(I915_READ(reg) & TRANS_STATE_ENABLE, 100)) 2264 if (wait_for(I915_READ(reg) & TRANS_STATE_ENABLE, 100))
2186 DRM_ERROR("failed to enable transcoder %d\n", pipe); 2265 DRM_ERROR("failed to enable transcoder %d\n", pipe);
2187 2266done:
2188 intel_crtc_load_lut(crtc); 2267 intel_crtc_load_lut(crtc);
2189 intel_update_fbc(dev); 2268 intel_update_fbc(dev);
2190 intel_crtc_update_cursor(crtc, true); 2269 intel_crtc_update_cursor(crtc, true);
@@ -6203,7 +6282,7 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
6203 * userspace... 6282 * userspace...
6204 */ 6283 */
6205 I915_WRITE(GEN6_RC_STATE, 0); 6284 I915_WRITE(GEN6_RC_STATE, 0);
6206 __gen6_force_wake_get(dev_priv); 6285 __gen6_gt_force_wake_get(dev_priv);
6207 6286
6208 /* disable the counters and set deterministic thresholds */ 6287 /* disable the counters and set deterministic thresholds */
6209 I915_WRITE(GEN6_RC_CONTROL, 0); 6288 I915_WRITE(GEN6_RC_CONTROL, 0);
@@ -6301,7 +6380,7 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
6301 /* enable all PM interrupts */ 6380 /* enable all PM interrupts */
6302 I915_WRITE(GEN6_PMINTRMSK, 0); 6381 I915_WRITE(GEN6_PMINTRMSK, 0);
6303 6382
6304 __gen6_force_wake_put(dev_priv); 6383 __gen6_gt_force_wake_put(dev_priv);
6305} 6384}
6306 6385
6307void intel_enable_clock_gating(struct drm_device *dev) 6386void intel_enable_clock_gating(struct drm_device *dev)
@@ -6496,7 +6575,7 @@ static void ironlake_disable_rc6(struct drm_device *dev)
6496 POSTING_READ(RSTDBYCTL); 6575 POSTING_READ(RSTDBYCTL);
6497 } 6576 }
6498 6577
6499 ironlake_disable_rc6(dev); 6578 ironlake_teardown_rc6(dev);
6500} 6579}
6501 6580
6502static int ironlake_setup_rc6(struct drm_device *dev) 6581static int ironlake_setup_rc6(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index d860abeda70f..f8f86e57df22 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -30,6 +30,8 @@
30 30
31#include "intel_drv.h" 31#include "intel_drv.h"
32 32
33#define PCI_LBPC 0xf4 /* legacy/combination backlight modes */
34
33void 35void
34intel_fixed_panel_mode(struct drm_display_mode *fixed_mode, 36intel_fixed_panel_mode(struct drm_display_mode *fixed_mode,
35 struct drm_display_mode *adjusted_mode) 37 struct drm_display_mode *adjusted_mode)
@@ -110,6 +112,19 @@ done:
110 dev_priv->pch_pf_size = (width << 16) | height; 112 dev_priv->pch_pf_size = (width << 16) | height;
111} 113}
112 114
115static int is_backlight_combination_mode(struct drm_device *dev)
116{
117 struct drm_i915_private *dev_priv = dev->dev_private;
118
119 if (INTEL_INFO(dev)->gen >= 4)
120 return I915_READ(BLC_PWM_CTL2) & BLM_COMBINATION_MODE;
121
122 if (IS_GEN2(dev))
123 return I915_READ(BLC_PWM_CTL) & BLM_LEGACY_MODE;
124
125 return 0;
126}
127
113static u32 i915_read_blc_pwm_ctl(struct drm_i915_private *dev_priv) 128static u32 i915_read_blc_pwm_ctl(struct drm_i915_private *dev_priv)
114{ 129{
115 u32 val; 130 u32 val;
@@ -166,6 +181,9 @@ u32 intel_panel_get_max_backlight(struct drm_device *dev)
166 if (INTEL_INFO(dev)->gen < 4) 181 if (INTEL_INFO(dev)->gen < 4)
167 max &= ~1; 182 max &= ~1;
168 } 183 }
184
185 if (is_backlight_combination_mode(dev))
186 max *= 0xff;
169 } 187 }
170 188
171 DRM_DEBUG_DRIVER("max backlight PWM = %d\n", max); 189 DRM_DEBUG_DRIVER("max backlight PWM = %d\n", max);
@@ -183,6 +201,14 @@ u32 intel_panel_get_backlight(struct drm_device *dev)
183 val = I915_READ(BLC_PWM_CTL) & BACKLIGHT_DUTY_CYCLE_MASK; 201 val = I915_READ(BLC_PWM_CTL) & BACKLIGHT_DUTY_CYCLE_MASK;
184 if (IS_PINEVIEW(dev)) 202 if (IS_PINEVIEW(dev))
185 val >>= 1; 203 val >>= 1;
204
205 if (is_backlight_combination_mode(dev)){
206 u8 lbpc;
207
208 val &= ~1;
209 pci_read_config_byte(dev->pdev, PCI_LBPC, &lbpc);
210 val *= lbpc;
211 }
186 } 212 }
187 213
188 DRM_DEBUG_DRIVER("get backlight PWM = %d\n", val); 214 DRM_DEBUG_DRIVER("get backlight PWM = %d\n", val);
@@ -205,6 +231,16 @@ void intel_panel_set_backlight(struct drm_device *dev, u32 level)
205 231
206 if (HAS_PCH_SPLIT(dev)) 232 if (HAS_PCH_SPLIT(dev))
207 return intel_pch_panel_set_backlight(dev, level); 233 return intel_pch_panel_set_backlight(dev, level);
234
235 if (is_backlight_combination_mode(dev)){
236 u32 max = intel_panel_get_max_backlight(dev);
237 u8 lbpc;
238
239 lbpc = level * 0xfe / max + 1;
240 level /= lbpc;
241 pci_write_config_byte(dev->pdev, PCI_LBPC, lbpc);
242 }
243
208 tmp = I915_READ(BLC_PWM_CTL); 244 tmp = I915_READ(BLC_PWM_CTL);
209 if (IS_PINEVIEW(dev)) { 245 if (IS_PINEVIEW(dev)) {
210 tmp &= ~(BACKLIGHT_DUTY_CYCLE_MASK - 1); 246 tmp &= ~(BACKLIGHT_DUTY_CYCLE_MASK - 1);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 6d6fde85a636..34306865a5df 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -14,22 +14,23 @@ struct intel_hw_status_page {
14 struct drm_i915_gem_object *obj; 14 struct drm_i915_gem_object *obj;
15}; 15};
16 16
17#define I915_RING_READ(reg) i915_safe_read(dev_priv, reg) 17#define I915_RING_READ(reg) i915_gt_read(dev_priv, reg)
18#define I915_RING_WRITE(reg, val) i915_gt_write(dev_priv, reg, val)
18 19
19#define I915_READ_TAIL(ring) I915_RING_READ(RING_TAIL((ring)->mmio_base)) 20#define I915_READ_TAIL(ring) I915_RING_READ(RING_TAIL((ring)->mmio_base))
20#define I915_WRITE_TAIL(ring, val) I915_WRITE(RING_TAIL((ring)->mmio_base), val) 21#define I915_WRITE_TAIL(ring, val) I915_RING_WRITE(RING_TAIL((ring)->mmio_base), val)
21 22
22#define I915_READ_START(ring) I915_RING_READ(RING_START((ring)->mmio_base)) 23#define I915_READ_START(ring) I915_RING_READ(RING_START((ring)->mmio_base))
23#define I915_WRITE_START(ring, val) I915_WRITE(RING_START((ring)->mmio_base), val) 24#define I915_WRITE_START(ring, val) I915_RING_WRITE(RING_START((ring)->mmio_base), val)
24 25
25#define I915_READ_HEAD(ring) I915_RING_READ(RING_HEAD((ring)->mmio_base)) 26#define I915_READ_HEAD(ring) I915_RING_READ(RING_HEAD((ring)->mmio_base))
26#define I915_WRITE_HEAD(ring, val) I915_WRITE(RING_HEAD((ring)->mmio_base), val) 27#define I915_WRITE_HEAD(ring, val) I915_RING_WRITE(RING_HEAD((ring)->mmio_base), val)
27 28
28#define I915_READ_CTL(ring) I915_RING_READ(RING_CTL((ring)->mmio_base)) 29#define I915_READ_CTL(ring) I915_RING_READ(RING_CTL((ring)->mmio_base))
29#define I915_WRITE_CTL(ring, val) I915_WRITE(RING_CTL((ring)->mmio_base), val) 30#define I915_WRITE_CTL(ring, val) I915_RING_WRITE(RING_CTL((ring)->mmio_base), val)
30 31
31#define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR((ring)->mmio_base), val)
32#define I915_READ_IMR(ring) I915_RING_READ(RING_IMR((ring)->mmio_base)) 32#define I915_READ_IMR(ring) I915_RING_READ(RING_IMR((ring)->mmio_base))
33#define I915_WRITE_IMR(ring, val) I915_RING_WRITE(RING_IMR((ring)->mmio_base), val)
33 34
34#define I915_READ_NOPID(ring) I915_RING_READ(RING_NOPID((ring)->mmio_base)) 35#define I915_READ_NOPID(ring) I915_RING_READ(RING_NOPID((ring)->mmio_base))
35#define I915_READ_SYNC_0(ring) I915_RING_READ(RING_SYNC_0((ring)->mmio_base)) 36#define I915_READ_SYNC_0(ring) I915_RING_READ(RING_SYNC_0((ring)->mmio_base))
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index d38a4d9f9b0b..a52184007f5f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -49,7 +49,10 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
49 DRM_ERROR("bo %p still attached to GEM object\n", bo); 49 DRM_ERROR("bo %p still attached to GEM object\n", bo);
50 50
51 nv10_mem_put_tile_region(dev, nvbo->tile, NULL); 51 nv10_mem_put_tile_region(dev, nvbo->tile, NULL);
52 nouveau_vm_put(&nvbo->vma); 52 if (nvbo->vma.node) {
53 nouveau_vm_unmap(&nvbo->vma);
54 nouveau_vm_put(&nvbo->vma);
55 }
53 kfree(nvbo); 56 kfree(nvbo);
54} 57}
55 58
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c
index 65699bfaaaea..b368ed74aad7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.c
@@ -83,7 +83,8 @@ nouveau_dma_init(struct nouveau_channel *chan)
83 return ret; 83 return ret;
84 84
85 /* NV_MEMORY_TO_MEMORY_FORMAT requires a notifier object */ 85 /* NV_MEMORY_TO_MEMORY_FORMAT requires a notifier object */
86 ret = nouveau_notifier_alloc(chan, NvNotify0, 32, &chan->m2mf_ntfy); 86 ret = nouveau_notifier_alloc(chan, NvNotify0, 32, 0xfd0, 0x1000,
87 &chan->m2mf_ntfy);
87 if (ret) 88 if (ret)
88 return ret; 89 return ret;
89 90
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 9821fcacc3d2..982d70b12722 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -852,7 +852,8 @@ extern const struct ttm_mem_type_manager_func nouveau_vram_manager;
852extern int nouveau_notifier_init_channel(struct nouveau_channel *); 852extern int nouveau_notifier_init_channel(struct nouveau_channel *);
853extern void nouveau_notifier_takedown_channel(struct nouveau_channel *); 853extern void nouveau_notifier_takedown_channel(struct nouveau_channel *);
854extern int nouveau_notifier_alloc(struct nouveau_channel *, uint32_t handle, 854extern int nouveau_notifier_alloc(struct nouveau_channel *, uint32_t handle,
855 int cout, uint32_t *offset); 855 int cout, uint32_t start, uint32_t end,
856 uint32_t *offset);
856extern int nouveau_notifier_offset(struct nouveau_gpuobj *, uint32_t *); 857extern int nouveau_notifier_offset(struct nouveau_gpuobj *, uint32_t *);
857extern int nouveau_ioctl_notifier_alloc(struct drm_device *, void *data, 858extern int nouveau_ioctl_notifier_alloc(struct drm_device *, void *data,
858 struct drm_file *); 859 struct drm_file *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index 26347b7cd872..b0fb9bdcddb7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -725,8 +725,10 @@ nouveau_vram_manager_new(struct ttm_mem_type_manager *man,
725 ret = vram->get(dev, mem->num_pages << PAGE_SHIFT, 725 ret = vram->get(dev, mem->num_pages << PAGE_SHIFT,
726 mem->page_alignment << PAGE_SHIFT, size_nc, 726 mem->page_alignment << PAGE_SHIFT, size_nc,
727 (nvbo->tile_flags >> 8) & 0xff, &node); 727 (nvbo->tile_flags >> 8) & 0xff, &node);
728 if (ret) 728 if (ret) {
729 return ret; 729 mem->mm_node = NULL;
730 return (ret == -ENOSPC) ? 0 : ret;
731 }
730 732
731 node->page_shift = 12; 733 node->page_shift = 12;
732 if (nvbo->vma.node) 734 if (nvbo->vma.node)
diff --git a/drivers/gpu/drm/nouveau/nouveau_mm.c b/drivers/gpu/drm/nouveau/nouveau_mm.c
index 8844b50c3e54..7609756b6faf 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mm.c
@@ -123,7 +123,7 @@ nouveau_mm_get(struct nouveau_mm *rmm, int type, u32 size, u32 size_nc,
123 return 0; 123 return 0;
124 } 124 }
125 125
126 return -ENOMEM; 126 return -ENOSPC;
127} 127}
128 128
129int 129int
diff --git a/drivers/gpu/drm/nouveau/nouveau_notifier.c b/drivers/gpu/drm/nouveau/nouveau_notifier.c
index fe29d604b820..5ea167623a82 100644
--- a/drivers/gpu/drm/nouveau/nouveau_notifier.c
+++ b/drivers/gpu/drm/nouveau/nouveau_notifier.c
@@ -96,7 +96,8 @@ nouveau_notifier_gpuobj_dtor(struct drm_device *dev,
96 96
97int 97int
98nouveau_notifier_alloc(struct nouveau_channel *chan, uint32_t handle, 98nouveau_notifier_alloc(struct nouveau_channel *chan, uint32_t handle,
99 int size, uint32_t *b_offset) 99 int size, uint32_t start, uint32_t end,
100 uint32_t *b_offset)
100{ 101{
101 struct drm_device *dev = chan->dev; 102 struct drm_device *dev = chan->dev;
102 struct nouveau_gpuobj *nobj = NULL; 103 struct nouveau_gpuobj *nobj = NULL;
@@ -104,9 +105,10 @@ nouveau_notifier_alloc(struct nouveau_channel *chan, uint32_t handle,
104 uint32_t offset; 105 uint32_t offset;
105 int target, ret; 106 int target, ret;
106 107
107 mem = drm_mm_search_free(&chan->notifier_heap, size, 0, 0); 108 mem = drm_mm_search_free_in_range(&chan->notifier_heap, size, 0,
109 start, end, 0);
108 if (mem) 110 if (mem)
109 mem = drm_mm_get_block(mem, size, 0); 111 mem = drm_mm_get_block_range(mem, size, 0, start, end);
110 if (!mem) { 112 if (!mem) {
111 NV_ERROR(dev, "Channel %d notifier block full\n", chan->id); 113 NV_ERROR(dev, "Channel %d notifier block full\n", chan->id);
112 return -ENOMEM; 114 return -ENOMEM;
@@ -177,7 +179,8 @@ nouveau_ioctl_notifier_alloc(struct drm_device *dev, void *data,
177 if (IS_ERR(chan)) 179 if (IS_ERR(chan))
178 return PTR_ERR(chan); 180 return PTR_ERR(chan);
179 181
180 ret = nouveau_notifier_alloc(chan, na->handle, na->size, &na->offset); 182 ret = nouveau_notifier_alloc(chan, na->handle, na->size, 0, 0x1000,
183 &na->offset);
181 nouveau_channel_put(&chan); 184 nouveau_channel_put(&chan);
182 return ret; 185 return ret;
183} 186}
diff --git a/drivers/gpu/drm/nouveau/nv50_instmem.c b/drivers/gpu/drm/nouveau/nv50_instmem.c
index ea0041810ae3..e57caa2a00e3 100644
--- a/drivers/gpu/drm/nouveau/nv50_instmem.c
+++ b/drivers/gpu/drm/nouveau/nv50_instmem.c
@@ -403,16 +403,24 @@ nv50_instmem_unmap(struct nouveau_gpuobj *gpuobj)
403void 403void
404nv50_instmem_flush(struct drm_device *dev) 404nv50_instmem_flush(struct drm_device *dev)
405{ 405{
406 struct drm_nouveau_private *dev_priv = dev->dev_private;
407
408 spin_lock(&dev_priv->ramin_lock);
406 nv_wr32(dev, 0x00330c, 0x00000001); 409 nv_wr32(dev, 0x00330c, 0x00000001);
407 if (!nv_wait(dev, 0x00330c, 0x00000002, 0x00000000)) 410 if (!nv_wait(dev, 0x00330c, 0x00000002, 0x00000000))
408 NV_ERROR(dev, "PRAMIN flush timeout\n"); 411 NV_ERROR(dev, "PRAMIN flush timeout\n");
412 spin_unlock(&dev_priv->ramin_lock);
409} 413}
410 414
411void 415void
412nv84_instmem_flush(struct drm_device *dev) 416nv84_instmem_flush(struct drm_device *dev)
413{ 417{
418 struct drm_nouveau_private *dev_priv = dev->dev_private;
419
420 spin_lock(&dev_priv->ramin_lock);
414 nv_wr32(dev, 0x070000, 0x00000001); 421 nv_wr32(dev, 0x070000, 0x00000001);
415 if (!nv_wait(dev, 0x070000, 0x00000002, 0x00000000)) 422 if (!nv_wait(dev, 0x070000, 0x00000002, 0x00000000))
416 NV_ERROR(dev, "PRAMIN flush timeout\n"); 423 NV_ERROR(dev, "PRAMIN flush timeout\n");
424 spin_unlock(&dev_priv->ramin_lock);
417} 425}
418 426
diff --git a/drivers/gpu/drm/nouveau/nv50_vm.c b/drivers/gpu/drm/nouveau/nv50_vm.c
index 459ff08241e5..6144156f255a 100644
--- a/drivers/gpu/drm/nouveau/nv50_vm.c
+++ b/drivers/gpu/drm/nouveau/nv50_vm.c
@@ -169,7 +169,11 @@ nv50_vm_flush(struct nouveau_vm *vm)
169void 169void
170nv50_vm_flush_engine(struct drm_device *dev, int engine) 170nv50_vm_flush_engine(struct drm_device *dev, int engine)
171{ 171{
172 struct drm_nouveau_private *dev_priv = dev->dev_private;
173
174 spin_lock(&dev_priv->ramin_lock);
172 nv_wr32(dev, 0x100c80, (engine << 16) | 1); 175 nv_wr32(dev, 0x100c80, (engine << 16) | 1);
173 if (!nv_wait(dev, 0x100c80, 0x00000001, 0x00000000)) 176 if (!nv_wait(dev, 0x100c80, 0x00000001, 0x00000000))
174 NV_ERROR(dev, "vm flush timeout: engine %d\n", engine); 177 NV_ERROR(dev, "vm flush timeout: engine %d\n", engine);
178 spin_unlock(&dev_priv->ramin_lock);
175} 179}
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index d270b3ff896b..6140ea1de45a 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -2194,7 +2194,6 @@ int evergreen_mc_init(struct radeon_device *rdev)
2194 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024; 2194 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2195 } 2195 }
2196 rdev->mc.visible_vram_size = rdev->mc.aper_size; 2196 rdev->mc.visible_vram_size = rdev->mc.aper_size;
2197 rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
2198 r700_vram_gtt_location(rdev, &rdev->mc); 2197 r700_vram_gtt_location(rdev, &rdev->mc);
2199 radeon_update_bandwidth_info(rdev); 2198 radeon_update_bandwidth_info(rdev);
2200 2199
@@ -2934,7 +2933,7 @@ static int evergreen_startup(struct radeon_device *rdev)
2934 /* XXX: ontario has problems blitting to gart at the moment */ 2933 /* XXX: ontario has problems blitting to gart at the moment */
2935 if (rdev->family == CHIP_PALM) { 2934 if (rdev->family == CHIP_PALM) {
2936 rdev->asic->copy = NULL; 2935 rdev->asic->copy = NULL;
2937 rdev->mc.active_vram_size = rdev->mc.visible_vram_size; 2936 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2938 } 2937 }
2939 2938
2940 /* allocate wb buffer */ 2939 /* allocate wb buffer */
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
index 2adfb03f479b..2be698e78ff2 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
@@ -623,7 +623,7 @@ done:
623 dev_err(rdev->dev, "(%d) pin blit object failed\n", r); 623 dev_err(rdev->dev, "(%d) pin blit object failed\n", r);
624 return r; 624 return r;
625 } 625 }
626 rdev->mc.active_vram_size = rdev->mc.real_vram_size; 626 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
627 return 0; 627 return 0;
628} 628}
629 629
@@ -631,7 +631,7 @@ void evergreen_blit_fini(struct radeon_device *rdev)
631{ 631{
632 int r; 632 int r;
633 633
634 rdev->mc.active_vram_size = rdev->mc.visible_vram_size; 634 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
635 if (rdev->r600_blit.shader_obj == NULL) 635 if (rdev->r600_blit.shader_obj == NULL)
636 return; 636 return;
637 /* If we can't reserve the bo, unref should be enough to destroy 637 /* If we can't reserve the bo, unref should be enough to destroy
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 56deae5bf02e..e372f9e1e5ce 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -70,23 +70,6 @@ MODULE_FIRMWARE(FIRMWARE_R520);
70 70
71void r100_pre_page_flip(struct radeon_device *rdev, int crtc) 71void r100_pre_page_flip(struct radeon_device *rdev, int crtc)
72{ 72{
73 struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc];
74 u32 tmp;
75
76 /* make sure flip is at vb rather than hb */
77 tmp = RREG32(RADEON_CRTC_OFFSET_CNTL + radeon_crtc->crtc_offset);
78 tmp &= ~RADEON_CRTC_OFFSET_FLIP_CNTL;
79 /* make sure pending bit is asserted */
80 tmp |= RADEON_CRTC_GUI_TRIG_OFFSET_LEFT_EN;
81 WREG32(RADEON_CRTC_OFFSET_CNTL + radeon_crtc->crtc_offset, tmp);
82
83 /* set pageflip to happen as late as possible in the vblank interval.
84 * same field for crtc1/2
85 */
86 tmp = RREG32(RADEON_CRTC_GEN_CNTL);
87 tmp &= ~RADEON_CRTC_VSTAT_MODE_MASK;
88 WREG32(RADEON_CRTC_GEN_CNTL, tmp);
89
90 /* enable the pflip int */ 73 /* enable the pflip int */
91 radeon_irq_kms_pflip_irq_get(rdev, crtc); 74 radeon_irq_kms_pflip_irq_get(rdev, crtc);
92} 75}
@@ -1041,7 +1024,7 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
1041 return r; 1024 return r;
1042 } 1025 }
1043 rdev->cp.ready = true; 1026 rdev->cp.ready = true;
1044 rdev->mc.active_vram_size = rdev->mc.real_vram_size; 1027 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1045 return 0; 1028 return 0;
1046} 1029}
1047 1030
@@ -1059,7 +1042,7 @@ void r100_cp_fini(struct radeon_device *rdev)
1059void r100_cp_disable(struct radeon_device *rdev) 1042void r100_cp_disable(struct radeon_device *rdev)
1060{ 1043{
1061 /* Disable ring */ 1044 /* Disable ring */
1062 rdev->mc.active_vram_size = rdev->mc.visible_vram_size; 1045 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1063 rdev->cp.ready = false; 1046 rdev->cp.ready = false;
1064 WREG32(RADEON_CP_CSQ_MODE, 0); 1047 WREG32(RADEON_CP_CSQ_MODE, 0);
1065 WREG32(RADEON_CP_CSQ_CNTL, 0); 1048 WREG32(RADEON_CP_CSQ_CNTL, 0);
@@ -2329,7 +2312,6 @@ void r100_vram_init_sizes(struct radeon_device *rdev)
2329 /* FIXME we don't use the second aperture yet when we could use it */ 2312 /* FIXME we don't use the second aperture yet when we could use it */
2330 if (rdev->mc.visible_vram_size > rdev->mc.aper_size) 2313 if (rdev->mc.visible_vram_size > rdev->mc.aper_size)
2331 rdev->mc.visible_vram_size = rdev->mc.aper_size; 2314 rdev->mc.visible_vram_size = rdev->mc.aper_size;
2332 rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
2333 config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE); 2315 config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
2334 if (rdev->flags & RADEON_IS_IGP) { 2316 if (rdev->flags & RADEON_IS_IGP) {
2335 uint32_t tom; 2317 uint32_t tom;
@@ -3490,7 +3472,7 @@ void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track
3490 track->num_texture = 16; 3472 track->num_texture = 16;
3491 track->maxy = 4096; 3473 track->maxy = 4096;
3492 track->separate_cube = 0; 3474 track->separate_cube = 0;
3493 track->aaresolve = true; 3475 track->aaresolve = false;
3494 track->aa.robj = NULL; 3476 track->aa.robj = NULL;
3495 } 3477 }
3496 3478
@@ -3801,8 +3783,6 @@ static int r100_startup(struct radeon_device *rdev)
3801 r100_mc_program(rdev); 3783 r100_mc_program(rdev);
3802 /* Resume clock */ 3784 /* Resume clock */
3803 r100_clock_startup(rdev); 3785 r100_clock_startup(rdev);
3804 /* Initialize GPU configuration (# pipes, ...) */
3805// r100_gpu_init(rdev);
3806 /* Initialize GART (initialize after TTM so we can allocate 3786 /* Initialize GART (initialize after TTM so we can allocate
3807 * memory through TTM but finalize after TTM) */ 3787 * memory through TTM but finalize after TTM) */
3808 r100_enable_bm(rdev); 3788 r100_enable_bm(rdev);
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index de88624d5f87..9b3fad23b76c 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1255,7 +1255,6 @@ int r600_mc_init(struct radeon_device *rdev)
1255 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE); 1255 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
1256 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE); 1256 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
1257 rdev->mc.visible_vram_size = rdev->mc.aper_size; 1257 rdev->mc.visible_vram_size = rdev->mc.aper_size;
1258 rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
1259 r600_vram_gtt_location(rdev, &rdev->mc); 1258 r600_vram_gtt_location(rdev, &rdev->mc);
1260 1259
1261 if (rdev->flags & RADEON_IS_IGP) { 1260 if (rdev->flags & RADEON_IS_IGP) {
@@ -1937,7 +1936,7 @@ void r600_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
1937 */ 1936 */
1938void r600_cp_stop(struct radeon_device *rdev) 1937void r600_cp_stop(struct radeon_device *rdev)
1939{ 1938{
1940 rdev->mc.active_vram_size = rdev->mc.visible_vram_size; 1939 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1941 WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1)); 1940 WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1));
1942 WREG32(SCRATCH_UMSK, 0); 1941 WREG32(SCRATCH_UMSK, 0);
1943} 1942}
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c
index 41f7aafc97c4..df68d91e8190 100644
--- a/drivers/gpu/drm/radeon/r600_blit_kms.c
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -558,7 +558,7 @@ done:
558 dev_err(rdev->dev, "(%d) pin blit object failed\n", r); 558 dev_err(rdev->dev, "(%d) pin blit object failed\n", r);
559 return r; 559 return r;
560 } 560 }
561 rdev->mc.active_vram_size = rdev->mc.real_vram_size; 561 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
562 return 0; 562 return 0;
563} 563}
564 564
@@ -566,7 +566,7 @@ void r600_blit_fini(struct radeon_device *rdev)
566{ 566{
567 int r; 567 int r;
568 568
569 rdev->mc.active_vram_size = rdev->mc.visible_vram_size; 569 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
570 if (rdev->r600_blit.shader_obj == NULL) 570 if (rdev->r600_blit.shader_obj == NULL)
571 return; 571 return;
572 /* If we can't reserve the bo, unref should be enough to destroy 572 /* If we can't reserve the bo, unref should be enough to destroy
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 56c48b67ef3d..6b3429495118 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -345,7 +345,6 @@ struct radeon_mc {
345 * about vram size near mc fb location */ 345 * about vram size near mc fb location */
346 u64 mc_vram_size; 346 u64 mc_vram_size;
347 u64 visible_vram_size; 347 u64 visible_vram_size;
348 u64 active_vram_size;
349 u64 gtt_size; 348 u64 gtt_size;
350 u64 gtt_start; 349 u64 gtt_start;
351 u64 gtt_end; 350 u64 gtt_end;
@@ -1448,6 +1447,7 @@ extern void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *m
1448extern void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc); 1447extern void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
1449extern int radeon_resume_kms(struct drm_device *dev); 1448extern int radeon_resume_kms(struct drm_device *dev);
1450extern int radeon_suspend_kms(struct drm_device *dev, pm_message_t state); 1449extern int radeon_suspend_kms(struct drm_device *dev, pm_message_t state);
1450extern void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size);
1451 1451
1452/* r600, rv610, rv630, rv620, rv635, rv670, rs780, rs880 */ 1452/* r600, rv610, rv630, rv620, rv635, rv670, rs780, rs880 */
1453extern bool r600_card_posted(struct radeon_device *rdev); 1453extern bool r600_card_posted(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index e75d63b8e21d..793c5e6026ad 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -834,6 +834,9 @@ static struct radeon_asic sumo_asic = {
834 .pm_finish = &evergreen_pm_finish, 834 .pm_finish = &evergreen_pm_finish,
835 .pm_init_profile = &rs780_pm_init_profile, 835 .pm_init_profile = &rs780_pm_init_profile,
836 .pm_get_dynpm_state = &r600_pm_get_dynpm_state, 836 .pm_get_dynpm_state = &r600_pm_get_dynpm_state,
837 .pre_page_flip = &evergreen_pre_page_flip,
838 .page_flip = &evergreen_page_flip,
839 .post_page_flip = &evergreen_post_page_flip,
837}; 840};
838 841
839static struct radeon_asic btc_asic = { 842static struct radeon_asic btc_asic = {
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 0e657095de7c..3e7e7f9eb781 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -971,7 +971,7 @@ void radeon_compute_pll_legacy(struct radeon_pll *pll,
971 max_fractional_feed_div = pll->max_frac_feedback_div; 971 max_fractional_feed_div = pll->max_frac_feedback_div;
972 } 972 }
973 973
974 for (post_div = min_post_div; post_div <= max_post_div; ++post_div) { 974 for (post_div = max_post_div; post_div >= min_post_div; --post_div) {
975 uint32_t ref_div; 975 uint32_t ref_div;
976 976
977 if ((pll->flags & RADEON_PLL_NO_ODD_POST_DIV) && (post_div & 1)) 977 if ((pll->flags & RADEON_PLL_NO_ODD_POST_DIV) && (post_div & 1))
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index 66324b5bb5ba..cc44bdfec80f 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -113,11 +113,14 @@ static int radeonfb_create_pinned_object(struct radeon_fbdev *rfbdev,
113 u32 tiling_flags = 0; 113 u32 tiling_flags = 0;
114 int ret; 114 int ret;
115 int aligned_size, size; 115 int aligned_size, size;
116 int height = mode_cmd->height;
116 117
117 /* need to align pitch with crtc limits */ 118 /* need to align pitch with crtc limits */
118 mode_cmd->pitch = radeon_align_pitch(rdev, mode_cmd->width, mode_cmd->bpp, fb_tiled) * ((mode_cmd->bpp + 1) / 8); 119 mode_cmd->pitch = radeon_align_pitch(rdev, mode_cmd->width, mode_cmd->bpp, fb_tiled) * ((mode_cmd->bpp + 1) / 8);
119 120
120 size = mode_cmd->pitch * mode_cmd->height; 121 if (rdev->family >= CHIP_R600)
122 height = ALIGN(mode_cmd->height, 8);
123 size = mode_cmd->pitch * height;
121 aligned_size = ALIGN(size, PAGE_SIZE); 124 aligned_size = ALIGN(size, PAGE_SIZE);
122 ret = radeon_gem_object_create(rdev, aligned_size, 0, 125 ret = radeon_gem_object_create(rdev, aligned_size, 0,
123 RADEON_GEM_DOMAIN_VRAM, 126 RADEON_GEM_DOMAIN_VRAM,
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index df95eb83dac6..1fe95dfe48c9 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -156,9 +156,12 @@ int radeon_gem_info_ioctl(struct drm_device *dev, void *data,
156{ 156{
157 struct radeon_device *rdev = dev->dev_private; 157 struct radeon_device *rdev = dev->dev_private;
158 struct drm_radeon_gem_info *args = data; 158 struct drm_radeon_gem_info *args = data;
159 struct ttm_mem_type_manager *man;
160
161 man = &rdev->mman.bdev.man[TTM_PL_VRAM];
159 162
160 args->vram_size = rdev->mc.real_vram_size; 163 args->vram_size = rdev->mc.real_vram_size;
161 args->vram_visible = rdev->mc.real_vram_size; 164 args->vram_visible = (u64)man->size << PAGE_SHIFT;
162 if (rdev->stollen_vga_memory) 165 if (rdev->stollen_vga_memory)
163 args->vram_visible -= radeon_bo_size(rdev->stollen_vga_memory); 166 args->vram_visible -= radeon_bo_size(rdev->stollen_vga_memory);
164 args->vram_visible -= radeon_fbdev_total_size(rdev); 167 args->vram_visible -= radeon_fbdev_total_size(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
index cf0638c3b7c7..78968b738e88 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
@@ -443,7 +443,7 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc,
443 (target_fb->bits_per_pixel * 8)); 443 (target_fb->bits_per_pixel * 8));
444 crtc_pitch |= crtc_pitch << 16; 444 crtc_pitch |= crtc_pitch << 16;
445 445
446 446 crtc_offset_cntl |= RADEON_CRTC_GUI_TRIG_OFFSET_LEFT_EN;
447 if (tiling_flags & RADEON_TILING_MACRO) { 447 if (tiling_flags & RADEON_TILING_MACRO) {
448 if (ASIC_IS_R300(rdev)) 448 if (ASIC_IS_R300(rdev))
449 crtc_offset_cntl |= (R300_CRTC_X_Y_MODE_EN | 449 crtc_offset_cntl |= (R300_CRTC_X_Y_MODE_EN |
@@ -502,6 +502,7 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc,
502 gen_cntl_val = RREG32(gen_cntl_reg); 502 gen_cntl_val = RREG32(gen_cntl_reg);
503 gen_cntl_val &= ~(0xf << 8); 503 gen_cntl_val &= ~(0xf << 8);
504 gen_cntl_val |= (format << 8); 504 gen_cntl_val |= (format << 8);
505 gen_cntl_val &= ~RADEON_CRTC_VSTAT_MODE_MASK;
505 WREG32(gen_cntl_reg, gen_cntl_val); 506 WREG32(gen_cntl_reg, gen_cntl_val);
506 507
507 crtc_offset = (u32)base; 508 crtc_offset = (u32)base;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index e5b2cf10cbf4..8389b4c63d12 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -589,6 +589,20 @@ void radeon_ttm_fini(struct radeon_device *rdev)
589 DRM_INFO("radeon: ttm finalized\n"); 589 DRM_INFO("radeon: ttm finalized\n");
590} 590}
591 591
592/* this should only be called at bootup or when userspace
593 * isn't running */
594void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size)
595{
596 struct ttm_mem_type_manager *man;
597
598 if (!rdev->mman.initialized)
599 return;
600
601 man = &rdev->mman.bdev.man[TTM_PL_VRAM];
602 /* this just adjusts TTM size idea, which sets lpfn to the correct value */
603 man->size = size >> PAGE_SHIFT;
604}
605
592static struct vm_operations_struct radeon_ttm_vm_ops; 606static struct vm_operations_struct radeon_ttm_vm_ops;
593static const struct vm_operations_struct *ttm_vm_ops = NULL; 607static const struct vm_operations_struct *ttm_vm_ops = NULL;
594 608
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index 5afe294ed51f..8af4679db23e 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -751,7 +751,6 @@ void rs600_mc_init(struct radeon_device *rdev)
751 rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE); 751 rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
752 rdev->mc.mc_vram_size = rdev->mc.real_vram_size; 752 rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
753 rdev->mc.visible_vram_size = rdev->mc.aper_size; 753 rdev->mc.visible_vram_size = rdev->mc.aper_size;
754 rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
755 rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev); 754 rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev);
756 base = RREG32_MC(R_000004_MC_FB_LOCATION); 755 base = RREG32_MC(R_000004_MC_FB_LOCATION);
757 base = G_000004_MC_FB_START(base) << 16; 756 base = G_000004_MC_FB_START(base) << 16;
diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c
index 6638c8e4c81b..66c949b7c18c 100644
--- a/drivers/gpu/drm/radeon/rs690.c
+++ b/drivers/gpu/drm/radeon/rs690.c
@@ -157,7 +157,6 @@ void rs690_mc_init(struct radeon_device *rdev)
157 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0); 157 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
158 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0); 158 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
159 rdev->mc.visible_vram_size = rdev->mc.aper_size; 159 rdev->mc.visible_vram_size = rdev->mc.aper_size;
160 rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
161 base = RREG32_MC(R_000100_MCCFG_FB_LOCATION); 160 base = RREG32_MC(R_000100_MCCFG_FB_LOCATION);
162 base = G_000100_MC_FB_START(base) << 16; 161 base = G_000100_MC_FB_START(base) << 16;
163 rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev); 162 rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev);
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index d8ba67690656..714ad45757d0 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -307,7 +307,7 @@ static void rv770_mc_program(struct radeon_device *rdev)
307 */ 307 */
308void r700_cp_stop(struct radeon_device *rdev) 308void r700_cp_stop(struct radeon_device *rdev)
309{ 309{
310 rdev->mc.active_vram_size = rdev->mc.visible_vram_size; 310 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
311 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT)); 311 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
312 WREG32(SCRATCH_UMSK, 0); 312 WREG32(SCRATCH_UMSK, 0);
313} 313}
@@ -1123,7 +1123,6 @@ int rv770_mc_init(struct radeon_device *rdev)
1123 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE); 1123 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
1124 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE); 1124 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
1125 rdev->mc.visible_vram_size = rdev->mc.aper_size; 1125 rdev->mc.visible_vram_size = rdev->mc.aper_size;
1126 rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
1127 r700_vram_gtt_location(rdev, &rdev->mc); 1126 r700_vram_gtt_location(rdev, &rdev->mc);
1128 radeon_update_bandwidth_info(rdev); 1127 radeon_update_bandwidth_info(rdev);
1129 1128
diff --git a/drivers/hwmon/ad7414.c b/drivers/hwmon/ad7414.c
index 86d822aa9bbf..d46c0c758ddf 100644
--- a/drivers/hwmon/ad7414.c
+++ b/drivers/hwmon/ad7414.c
@@ -242,6 +242,7 @@ static const struct i2c_device_id ad7414_id[] = {
242 { "ad7414", 0 }, 242 { "ad7414", 0 },
243 {} 243 {}
244}; 244};
245MODULE_DEVICE_TABLE(i2c, ad7414_id);
245 246
246static struct i2c_driver ad7414_driver = { 247static struct i2c_driver ad7414_driver = {
247 .driver = { 248 .driver = {
diff --git a/drivers/hwmon/adt7411.c b/drivers/hwmon/adt7411.c
index f13c843a2964..5cc3e3784b42 100644
--- a/drivers/hwmon/adt7411.c
+++ b/drivers/hwmon/adt7411.c
@@ -334,6 +334,7 @@ static const struct i2c_device_id adt7411_id[] = {
334 { "adt7411", 0 }, 334 { "adt7411", 0 },
335 { } 335 { }
336}; 336};
337MODULE_DEVICE_TABLE(i2c, adt7411_id);
337 338
338static struct i2c_driver adt7411_driver = { 339static struct i2c_driver adt7411_driver = {
339 .driver = { 340 .driver = {
diff --git a/drivers/hwmon/f71882fg.c b/drivers/hwmon/f71882fg.c
index 3f49dd376f02..6e06019015a5 100644
--- a/drivers/hwmon/f71882fg.c
+++ b/drivers/hwmon/f71882fg.c
@@ -37,7 +37,7 @@
37#define SIO_F71858FG_LD_HWM 0x02 /* Hardware monitor logical device */ 37#define SIO_F71858FG_LD_HWM 0x02 /* Hardware monitor logical device */
38#define SIO_F71882FG_LD_HWM 0x04 /* Hardware monitor logical device */ 38#define SIO_F71882FG_LD_HWM 0x04 /* Hardware monitor logical device */
39#define SIO_UNLOCK_KEY 0x87 /* Key to enable Super-I/O */ 39#define SIO_UNLOCK_KEY 0x87 /* Key to enable Super-I/O */
40#define SIO_LOCK_KEY 0xAA /* Key to diasble Super-I/O */ 40#define SIO_LOCK_KEY 0xAA /* Key to disable Super-I/O */
41 41
42#define SIO_REG_LDSEL 0x07 /* Logical device select */ 42#define SIO_REG_LDSEL 0x07 /* Logical device select */
43#define SIO_REG_DEVID 0x20 /* Device ID (2 bytes) */ 43#define SIO_REG_DEVID 0x20 /* Device ID (2 bytes) */
@@ -2111,7 +2111,6 @@ static int f71882fg_remove(struct platform_device *pdev)
2111 int nr_fans = (data->type == f71882fg) ? 4 : 3; 2111 int nr_fans = (data->type == f71882fg) ? 4 : 3;
2112 u8 start_reg = f71882fg_read8(data, F71882FG_REG_START); 2112 u8 start_reg = f71882fg_read8(data, F71882FG_REG_START);
2113 2113
2114 platform_set_drvdata(pdev, NULL);
2115 if (data->hwmon_dev) 2114 if (data->hwmon_dev)
2116 hwmon_device_unregister(data->hwmon_dev); 2115 hwmon_device_unregister(data->hwmon_dev);
2117 2116
@@ -2178,6 +2177,7 @@ static int f71882fg_remove(struct platform_device *pdev)
2178 } 2177 }
2179 } 2178 }
2180 2179
2180 platform_set_drvdata(pdev, NULL);
2181 kfree(data); 2181 kfree(data);
2182 2182
2183 return 0; 2183 return 0;
diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c
index 2e067dd2ee51..50ea1f43bdc1 100644
--- a/drivers/i2c/busses/i2c-eg20t.c
+++ b/drivers/i2c/busses/i2c-eg20t.c
@@ -29,6 +29,7 @@
29#include <linux/pci.h> 29#include <linux/pci.h>
30#include <linux/mutex.h> 30#include <linux/mutex.h>
31#include <linux/ktime.h> 31#include <linux/ktime.h>
32#include <linux/slab.h>
32 33
33#define PCH_EVENT_SET 0 /* I2C Interrupt Event Set Status */ 34#define PCH_EVENT_SET 0 /* I2C Interrupt Event Set Status */
34#define PCH_EVENT_NONE 1 /* I2C Interrupt Event Clear Status */ 35#define PCH_EVENT_NONE 1 /* I2C Interrupt Event Clear Status */
diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
index c2ef5ce1f1ff..1b46a9d9f907 100644
--- a/drivers/i2c/busses/i2c-ocores.c
+++ b/drivers/i2c/busses/i2c-ocores.c
@@ -249,7 +249,7 @@ static struct i2c_adapter ocores_adapter = {
249static int ocores_i2c_of_probe(struct platform_device* pdev, 249static int ocores_i2c_of_probe(struct platform_device* pdev,
250 struct ocores_i2c* i2c) 250 struct ocores_i2c* i2c)
251{ 251{
252 __be32* val; 252 const __be32* val;
253 253
254 val = of_get_property(pdev->dev.of_node, "regstep", NULL); 254 val = of_get_property(pdev->dev.of_node, "regstep", NULL);
255 if (!val) { 255 if (!val) {
diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index b605ff3a1fa0..58a58c7eaa17 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -378,9 +378,7 @@ static int omap_i2c_init(struct omap_i2c_dev *dev)
378 * REVISIT: Some wkup sources might not be needed. 378 * REVISIT: Some wkup sources might not be needed.
379 */ 379 */
380 dev->westate = OMAP_I2C_WE_ALL; 380 dev->westate = OMAP_I2C_WE_ALL;
381 if (dev->rev < OMAP_I2C_REV_ON_4430) 381 omap_i2c_write_reg(dev, OMAP_I2C_WE_REG, dev->westate);
382 omap_i2c_write_reg(dev, OMAP_I2C_WE_REG,
383 dev->westate);
384 } 382 }
385 } 383 }
386 omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, 0); 384 omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, 0);
@@ -847,11 +845,15 @@ complete:
847 dev_err(dev->dev, "Arbitration lost\n"); 845 dev_err(dev->dev, "Arbitration lost\n");
848 err |= OMAP_I2C_STAT_AL; 846 err |= OMAP_I2C_STAT_AL;
849 } 847 }
848 /*
849 * ProDB0017052: Clear ARDY bit twice
850 */
850 if (stat & (OMAP_I2C_STAT_ARDY | OMAP_I2C_STAT_NACK | 851 if (stat & (OMAP_I2C_STAT_ARDY | OMAP_I2C_STAT_NACK |
851 OMAP_I2C_STAT_AL)) { 852 OMAP_I2C_STAT_AL)) {
852 omap_i2c_ack_stat(dev, stat & 853 omap_i2c_ack_stat(dev, stat &
853 (OMAP_I2C_STAT_RRDY | OMAP_I2C_STAT_RDR | 854 (OMAP_I2C_STAT_RRDY | OMAP_I2C_STAT_RDR |
854 OMAP_I2C_STAT_XRDY | OMAP_I2C_STAT_XDR)); 855 OMAP_I2C_STAT_XRDY | OMAP_I2C_STAT_XDR |
856 OMAP_I2C_STAT_ARDY));
855 omap_i2c_complete_cmd(dev, err); 857 omap_i2c_complete_cmd(dev, err);
856 return IRQ_HANDLED; 858 return IRQ_HANDLED;
857 } 859 }
@@ -1137,12 +1139,41 @@ omap_i2c_remove(struct platform_device *pdev)
1137 return 0; 1139 return 0;
1138} 1140}
1139 1141
1142#ifdef CONFIG_SUSPEND
1143static int omap_i2c_suspend(struct device *dev)
1144{
1145 if (!pm_runtime_suspended(dev))
1146 if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend)
1147 dev->bus->pm->runtime_suspend(dev);
1148
1149 return 0;
1150}
1151
1152static int omap_i2c_resume(struct device *dev)
1153{
1154 if (!pm_runtime_suspended(dev))
1155 if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume)
1156 dev->bus->pm->runtime_resume(dev);
1157
1158 return 0;
1159}
1160
1161static struct dev_pm_ops omap_i2c_pm_ops = {
1162 .suspend = omap_i2c_suspend,
1163 .resume = omap_i2c_resume,
1164};
1165#define OMAP_I2C_PM_OPS (&omap_i2c_pm_ops)
1166#else
1167#define OMAP_I2C_PM_OPS NULL
1168#endif
1169
1140static struct platform_driver omap_i2c_driver = { 1170static struct platform_driver omap_i2c_driver = {
1141 .probe = omap_i2c_probe, 1171 .probe = omap_i2c_probe,
1142 .remove = omap_i2c_remove, 1172 .remove = omap_i2c_remove,
1143 .driver = { 1173 .driver = {
1144 .name = "omap_i2c", 1174 .name = "omap_i2c",
1145 .owner = THIS_MODULE, 1175 .owner = THIS_MODULE,
1176 .pm = OMAP_I2C_PM_OPS,
1146 }, 1177 },
1147}; 1178};
1148 1179
diff --git a/drivers/i2c/busses/i2c-stu300.c b/drivers/i2c/busses/i2c-stu300.c
index 495be451d326..266135ddf7fa 100644
--- a/drivers/i2c/busses/i2c-stu300.c
+++ b/drivers/i2c/busses/i2c-stu300.c
@@ -942,7 +942,7 @@ stu300_probe(struct platform_device *pdev)
942 adap->owner = THIS_MODULE; 942 adap->owner = THIS_MODULE;
943 /* DDC class but actually often used for more generic I2C */ 943 /* DDC class but actually often used for more generic I2C */
944 adap->class = I2C_CLASS_DDC; 944 adap->class = I2C_CLASS_DDC;
945 strncpy(adap->name, "ST Microelectronics DDC I2C adapter", 945 strlcpy(adap->name, "ST Microelectronics DDC I2C adapter",
946 sizeof(adap->name)); 946 sizeof(adap->name));
947 adap->nr = bus_nr; 947 adap->nr = bus_nr;
948 adap->algo = &stu300_algo; 948 adap->algo = &stu300_algo;
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 1fa091e05690..4a5c4a44ffb1 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -62,6 +62,7 @@
62#include <linux/notifier.h> 62#include <linux/notifier.h>
63#include <linux/cpu.h> 63#include <linux/cpu.h>
64#include <asm/mwait.h> 64#include <asm/mwait.h>
65#include <asm/msr.h>
65 66
66#define INTEL_IDLE_VERSION "0.4" 67#define INTEL_IDLE_VERSION "0.4"
67#define PREFIX "intel_idle: " 68#define PREFIX "intel_idle: "
@@ -85,6 +86,12 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state);
85static struct cpuidle_state *cpuidle_state_table; 86static struct cpuidle_state *cpuidle_state_table;
86 87
87/* 88/*
89 * Hardware C-state auto-demotion may not always be optimal.
90 * Indicate which enable bits to clear here.
91 */
92static unsigned long long auto_demotion_disable_flags;
93
94/*
88 * Set this flag for states where the HW flushes the TLB for us 95 * Set this flag for states where the HW flushes the TLB for us
89 * and so we don't need cross-calls to keep it consistent. 96 * and so we don't need cross-calls to keep it consistent.
90 * If this flag is set, SW flushes the TLB, so even if the 97 * If this flag is set, SW flushes the TLB, so even if the
@@ -281,6 +288,15 @@ static struct notifier_block setup_broadcast_notifier = {
281 .notifier_call = setup_broadcast_cpuhp_notify, 288 .notifier_call = setup_broadcast_cpuhp_notify,
282}; 289};
283 290
291static void auto_demotion_disable(void *dummy)
292{
293 unsigned long long msr_bits;
294
295 rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
296 msr_bits &= ~auto_demotion_disable_flags;
297 wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
298}
299
284/* 300/*
285 * intel_idle_probe() 301 * intel_idle_probe()
286 */ 302 */
@@ -324,11 +340,17 @@ static int intel_idle_probe(void)
324 case 0x25: /* Westmere */ 340 case 0x25: /* Westmere */
325 case 0x2C: /* Westmere */ 341 case 0x2C: /* Westmere */
326 cpuidle_state_table = nehalem_cstates; 342 cpuidle_state_table = nehalem_cstates;
343 auto_demotion_disable_flags =
344 (NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE);
327 break; 345 break;
328 346
329 case 0x1C: /* 28 - Atom Processor */ 347 case 0x1C: /* 28 - Atom Processor */
348 cpuidle_state_table = atom_cstates;
349 break;
350
330 case 0x26: /* 38 - Lincroft Atom Processor */ 351 case 0x26: /* 38 - Lincroft Atom Processor */
331 cpuidle_state_table = atom_cstates; 352 cpuidle_state_table = atom_cstates;
353 auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE;
332 break; 354 break;
333 355
334 case 0x2A: /* SNB */ 356 case 0x2A: /* SNB */
@@ -436,6 +458,8 @@ static int intel_idle_cpuidle_devices_init(void)
436 return -EIO; 458 return -EIO;
437 } 459 }
438 } 460 }
461 if (auto_demotion_disable_flags)
462 smp_call_function(auto_demotion_disable, NULL, 1);
439 463
440 return 0; 464 return 0;
441} 465}
diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c
index 23cf8fc933ec..5b8f59d6c3e8 100644
--- a/drivers/input/gameport/gameport.c
+++ b/drivers/input/gameport/gameport.c
@@ -360,7 +360,7 @@ static int gameport_queue_event(void *object, struct module *owner,
360 event->owner = owner; 360 event->owner = owner;
361 361
362 list_add_tail(&event->node, &gameport_event_list); 362 list_add_tail(&event->node, &gameport_event_list);
363 schedule_work(&gameport_event_work); 363 queue_work(system_long_wq, &gameport_event_work);
364 364
365out: 365out:
366 spin_unlock_irqrestore(&gameport_event_lock, flags); 366 spin_unlock_irqrestore(&gameport_event_lock, flags);
diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c
index ac471b77c18e..99ce9032d08c 100644
--- a/drivers/input/keyboard/tegra-kbc.c
+++ b/drivers/input/keyboard/tegra-kbc.c
@@ -71,8 +71,9 @@ struct tegra_kbc {
71 spinlock_t lock; 71 spinlock_t lock;
72 unsigned int repoll_dly; 72 unsigned int repoll_dly;
73 unsigned long cp_dly_jiffies; 73 unsigned long cp_dly_jiffies;
74 bool use_fn_map;
74 const struct tegra_kbc_platform_data *pdata; 75 const struct tegra_kbc_platform_data *pdata;
75 unsigned short keycode[KBC_MAX_KEY]; 76 unsigned short keycode[KBC_MAX_KEY * 2];
76 unsigned short current_keys[KBC_MAX_KPENT]; 77 unsigned short current_keys[KBC_MAX_KPENT];
77 unsigned int num_pressed_keys; 78 unsigned int num_pressed_keys;
78 struct timer_list timer; 79 struct timer_list timer;
@@ -178,6 +179,40 @@ static const u32 tegra_kbc_default_keymap[] = {
178 KEY(15, 5, KEY_F2), 179 KEY(15, 5, KEY_F2),
179 KEY(15, 6, KEY_CAPSLOCK), 180 KEY(15, 6, KEY_CAPSLOCK),
180 KEY(15, 7, KEY_F6), 181 KEY(15, 7, KEY_F6),
182
183 /* Software Handled Function Keys */
184 KEY(20, 0, KEY_KP7),
185
186 KEY(21, 0, KEY_KP9),
187 KEY(21, 1, KEY_KP8),
188 KEY(21, 2, KEY_KP4),
189 KEY(21, 4, KEY_KP1),
190
191 KEY(22, 1, KEY_KPSLASH),
192 KEY(22, 2, KEY_KP6),
193 KEY(22, 3, KEY_KP5),
194 KEY(22, 4, KEY_KP3),
195 KEY(22, 5, KEY_KP2),
196 KEY(22, 7, KEY_KP0),
197
198 KEY(27, 1, KEY_KPASTERISK),
199 KEY(27, 3, KEY_KPMINUS),
200 KEY(27, 4, KEY_KPPLUS),
201 KEY(27, 5, KEY_KPDOT),
202
203 KEY(28, 5, KEY_VOLUMEUP),
204
205 KEY(29, 3, KEY_HOME),
206 KEY(29, 4, KEY_END),
207 KEY(29, 5, KEY_BRIGHTNESSDOWN),
208 KEY(29, 6, KEY_VOLUMEDOWN),
209 KEY(29, 7, KEY_BRIGHTNESSUP),
210
211 KEY(30, 0, KEY_NUMLOCK),
212 KEY(30, 1, KEY_SCROLLLOCK),
213 KEY(30, 2, KEY_MUTE),
214
215 KEY(31, 4, KEY_HELP),
181}; 216};
182 217
183static const struct matrix_keymap_data tegra_kbc_default_keymap_data = { 218static const struct matrix_keymap_data tegra_kbc_default_keymap_data = {
@@ -224,6 +259,7 @@ static void tegra_kbc_report_keys(struct tegra_kbc *kbc)
224 unsigned int i; 259 unsigned int i;
225 unsigned int num_down = 0; 260 unsigned int num_down = 0;
226 unsigned long flags; 261 unsigned long flags;
262 bool fn_keypress = false;
227 263
228 spin_lock_irqsave(&kbc->lock, flags); 264 spin_lock_irqsave(&kbc->lock, flags);
229 for (i = 0; i < KBC_MAX_KPENT; i++) { 265 for (i = 0; i < KBC_MAX_KPENT; i++) {
@@ -237,11 +273,28 @@ static void tegra_kbc_report_keys(struct tegra_kbc *kbc)
237 MATRIX_SCAN_CODE(row, col, KBC_ROW_SHIFT); 273 MATRIX_SCAN_CODE(row, col, KBC_ROW_SHIFT);
238 274
239 scancodes[num_down] = scancode; 275 scancodes[num_down] = scancode;
240 keycodes[num_down++] = kbc->keycode[scancode]; 276 keycodes[num_down] = kbc->keycode[scancode];
277 /* If driver uses Fn map, do not report the Fn key. */
278 if ((keycodes[num_down] == KEY_FN) && kbc->use_fn_map)
279 fn_keypress = true;
280 else
281 num_down++;
241 } 282 }
242 283
243 val >>= 8; 284 val >>= 8;
244 } 285 }
286
287 /*
288 * If the platform uses Fn keymaps, translate keys on a Fn keypress.
289 * Function keycodes are KBC_MAX_KEY apart from the plain keycodes.
290 */
291 if (fn_keypress) {
292 for (i = 0; i < num_down; i++) {
293 scancodes[i] += KBC_MAX_KEY;
294 keycodes[i] = kbc->keycode[scancodes[i]];
295 }
296 }
297
245 spin_unlock_irqrestore(&kbc->lock, flags); 298 spin_unlock_irqrestore(&kbc->lock, flags);
246 299
247 tegra_kbc_report_released_keys(kbc->idev, 300 tegra_kbc_report_released_keys(kbc->idev,
@@ -594,8 +647,11 @@ static int __devinit tegra_kbc_probe(struct platform_device *pdev)
594 647
595 input_dev->keycode = kbc->keycode; 648 input_dev->keycode = kbc->keycode;
596 input_dev->keycodesize = sizeof(kbc->keycode[0]); 649 input_dev->keycodesize = sizeof(kbc->keycode[0]);
597 input_dev->keycodemax = ARRAY_SIZE(kbc->keycode); 650 input_dev->keycodemax = KBC_MAX_KEY;
651 if (pdata->use_fn_map)
652 input_dev->keycodemax *= 2;
598 653
654 kbc->use_fn_map = pdata->use_fn_map;
599 keymap_data = pdata->keymap_data ?: &tegra_kbc_default_keymap_data; 655 keymap_data = pdata->keymap_data ?: &tegra_kbc_default_keymap_data;
600 matrix_keypad_build_keymap(keymap_data, KBC_ROW_SHIFT, 656 matrix_keypad_build_keymap(keymap_data, KBC_ROW_SHIFT,
601 input_dev->keycode, input_dev->keybit); 657 input_dev->keycode, input_dev->keybit);
diff --git a/drivers/input/mouse/synaptics.h b/drivers/input/mouse/synaptics.h
index 25e5d042a72c..7453938bf5ef 100644
--- a/drivers/input/mouse/synaptics.h
+++ b/drivers/input/mouse/synaptics.h
@@ -51,6 +51,29 @@
51#define SYN_EXT_CAP_REQUESTS(c) (((c) & 0x700000) >> 20) 51#define SYN_EXT_CAP_REQUESTS(c) (((c) & 0x700000) >> 20)
52#define SYN_CAP_MULTI_BUTTON_NO(ec) (((ec) & 0x00f000) >> 12) 52#define SYN_CAP_MULTI_BUTTON_NO(ec) (((ec) & 0x00f000) >> 12)
53#define SYN_CAP_PRODUCT_ID(ec) (((ec) & 0xff0000) >> 16) 53#define SYN_CAP_PRODUCT_ID(ec) (((ec) & 0xff0000) >> 16)
54
55/*
56 * The following describes response for the 0x0c query.
57 *
58 * byte mask name meaning
59 * ---- ---- ------- ------------
60 * 1 0x01 adjustable threshold capacitive button sensitivity
61 * can be adjusted
62 * 1 0x02 report max query 0x0d gives max coord reported
63 * 1 0x04 clearpad sensor is ClearPad product
64 * 1 0x08 advanced gesture not particularly meaningful
65 * 1 0x10 clickpad bit 0 1-button ClickPad
66 * 1 0x60 multifinger mode identifies firmware finger counting
67 * (not reporting!) algorithm.
68 * Not particularly meaningful
69 * 1 0x80 covered pad W clipped to 14, 15 == pad mostly covered
70 * 2 0x01 clickpad bit 1 2-button ClickPad
71 * 2 0x02 deluxe LED controls touchpad support LED commands
72 * ala multimedia control bar
73 * 2 0x04 reduced filtering firmware does less filtering on
74 * position data, driver should watch
75 * for noise.
76 */
54#define SYN_CAP_CLICKPAD(ex0c) ((ex0c) & 0x100000) /* 1-button ClickPad */ 77#define SYN_CAP_CLICKPAD(ex0c) ((ex0c) & 0x100000) /* 1-button ClickPad */
55#define SYN_CAP_CLICKPAD2BTN(ex0c) ((ex0c) & 0x000100) /* 2-button ClickPad */ 78#define SYN_CAP_CLICKPAD2BTN(ex0c) ((ex0c) & 0x000100) /* 2-button ClickPad */
56#define SYN_CAP_MAX_DIMENSIONS(ex0c) ((ex0c) & 0x020000) 79#define SYN_CAP_MAX_DIMENSIONS(ex0c) ((ex0c) & 0x020000)
diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c
index 7c38d1fbabf2..ba70058e2be3 100644
--- a/drivers/input/serio/serio.c
+++ b/drivers/input/serio/serio.c
@@ -299,7 +299,7 @@ static int serio_queue_event(void *object, struct module *owner,
299 event->owner = owner; 299 event->owner = owner;
300 300
301 list_add_tail(&event->node, &serio_event_list); 301 list_add_tail(&event->node, &serio_event_list);
302 schedule_work(&serio_event_work); 302 queue_work(system_long_wq, &serio_event_work);
303 303
304out: 304out:
305 spin_unlock_irqrestore(&serio_event_lock, flags); 305 spin_unlock_irqrestore(&serio_event_lock, flags);
diff --git a/drivers/isdn/hardware/eicon/istream.c b/drivers/isdn/hardware/eicon/istream.c
index 18f8798442fa..7bd5baa547be 100644
--- a/drivers/isdn/hardware/eicon/istream.c
+++ b/drivers/isdn/hardware/eicon/istream.c
@@ -62,7 +62,7 @@ void diva_xdi_provide_istream_info (ADAPTER* a,
62 stream interface. 62 stream interface.
63 If synchronous service was requested, then function 63 If synchronous service was requested, then function
64 does return amount of data written to stream. 64 does return amount of data written to stream.
65 'final' does indicate that pice of data to be written is 65 'final' does indicate that piece of data to be written is
66 final part of frame (necessary only by structured datatransfer) 66 final part of frame (necessary only by structured datatransfer)
67 return 0 if zero lengh packet was written 67 return 0 if zero lengh packet was written
68 return -1 if stream is full 68 return -1 if stream is full
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 8a2f767f26d8..0ed7f6bc2a7f 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -216,7 +216,6 @@ static int linear_run (mddev_t *mddev)
216 216
217 if (md_check_no_bitmap(mddev)) 217 if (md_check_no_bitmap(mddev))
218 return -EINVAL; 218 return -EINVAL;
219 mddev->queue->queue_lock = &mddev->queue->__queue_lock;
220 conf = linear_conf(mddev, mddev->raid_disks); 219 conf = linear_conf(mddev, mddev->raid_disks);
221 220
222 if (!conf) 221 if (!conf)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 0cc30ecda4c1..818313e277e7 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -553,6 +553,9 @@ static mddev_t * mddev_find(dev_t unit)
553{ 553{
554 mddev_t *mddev, *new = NULL; 554 mddev_t *mddev, *new = NULL;
555 555
556 if (unit && MAJOR(unit) != MD_MAJOR)
557 unit &= ~((1<<MdpMinorShift)-1);
558
556 retry: 559 retry:
557 spin_lock(&all_mddevs_lock); 560 spin_lock(&all_mddevs_lock);
558 561
@@ -4138,10 +4141,10 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len)
4138 } 4141 }
4139 4142
4140 mddev->array_sectors = sectors; 4143 mddev->array_sectors = sectors;
4141 set_capacity(mddev->gendisk, mddev->array_sectors); 4144 if (mddev->pers) {
4142 if (mddev->pers) 4145 set_capacity(mddev->gendisk, mddev->array_sectors);
4143 revalidate_disk(mddev->gendisk); 4146 revalidate_disk(mddev->gendisk);
4144 4147 }
4145 return len; 4148 return len;
4146} 4149}
4147 4150
@@ -4624,6 +4627,7 @@ static int do_md_run(mddev_t *mddev)
4624 } 4627 }
4625 set_capacity(mddev->gendisk, mddev->array_sectors); 4628 set_capacity(mddev->gendisk, mddev->array_sectors);
4626 revalidate_disk(mddev->gendisk); 4629 revalidate_disk(mddev->gendisk);
4630 mddev->changed = 1;
4627 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); 4631 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
4628out: 4632out:
4629 return err; 4633 return err;
@@ -4712,6 +4716,7 @@ static void md_clean(mddev_t *mddev)
4712 mddev->sync_speed_min = mddev->sync_speed_max = 0; 4716 mddev->sync_speed_min = mddev->sync_speed_max = 0;
4713 mddev->recovery = 0; 4717 mddev->recovery = 0;
4714 mddev->in_sync = 0; 4718 mddev->in_sync = 0;
4719 mddev->changed = 0;
4715 mddev->degraded = 0; 4720 mddev->degraded = 0;
4716 mddev->safemode = 0; 4721 mddev->safemode = 0;
4717 mddev->bitmap_info.offset = 0; 4722 mddev->bitmap_info.offset = 0;
@@ -4827,6 +4832,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4827 4832
4828 set_capacity(disk, 0); 4833 set_capacity(disk, 0);
4829 mutex_unlock(&mddev->open_mutex); 4834 mutex_unlock(&mddev->open_mutex);
4835 mddev->changed = 1;
4830 revalidate_disk(disk); 4836 revalidate_disk(disk);
4831 4837
4832 if (mddev->ro) 4838 if (mddev->ro)
@@ -6011,7 +6017,7 @@ static int md_open(struct block_device *bdev, fmode_t mode)
6011 atomic_inc(&mddev->openers); 6017 atomic_inc(&mddev->openers);
6012 mutex_unlock(&mddev->open_mutex); 6018 mutex_unlock(&mddev->open_mutex);
6013 6019
6014 check_disk_size_change(mddev->gendisk, bdev); 6020 check_disk_change(bdev);
6015 out: 6021 out:
6016 return err; 6022 return err;
6017} 6023}
@@ -6026,6 +6032,21 @@ static int md_release(struct gendisk *disk, fmode_t mode)
6026 6032
6027 return 0; 6033 return 0;
6028} 6034}
6035
6036static int md_media_changed(struct gendisk *disk)
6037{
6038 mddev_t *mddev = disk->private_data;
6039
6040 return mddev->changed;
6041}
6042
6043static int md_revalidate(struct gendisk *disk)
6044{
6045 mddev_t *mddev = disk->private_data;
6046
6047 mddev->changed = 0;
6048 return 0;
6049}
6029static const struct block_device_operations md_fops = 6050static const struct block_device_operations md_fops =
6030{ 6051{
6031 .owner = THIS_MODULE, 6052 .owner = THIS_MODULE,
@@ -6036,6 +6057,8 @@ static const struct block_device_operations md_fops =
6036 .compat_ioctl = md_compat_ioctl, 6057 .compat_ioctl = md_compat_ioctl,
6037#endif 6058#endif
6038 .getgeo = md_getgeo, 6059 .getgeo = md_getgeo,
6060 .media_changed = md_media_changed,
6061 .revalidate_disk= md_revalidate,
6039}; 6062};
6040 6063
6041static int md_thread(void * arg) 6064static int md_thread(void * arg)
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 7e90b8593b2a..12215d437fcc 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -274,6 +274,8 @@ struct mddev_s
274 atomic_t active; /* general refcount */ 274 atomic_t active; /* general refcount */
275 atomic_t openers; /* number of active opens */ 275 atomic_t openers; /* number of active opens */
276 276
277 int changed; /* True if we might need to
278 * reread partition info */
277 int degraded; /* whether md should consider 279 int degraded; /* whether md should consider
278 * adding a spare 280 * adding a spare
279 */ 281 */
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 6d7ddf32ef2e..3a62d440e27b 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -435,7 +435,6 @@ static int multipath_run (mddev_t *mddev)
435 * bookkeeping area. [whatever we allocate in multipath_run(), 435 * bookkeeping area. [whatever we allocate in multipath_run(),
436 * should be freed in multipath_stop()] 436 * should be freed in multipath_stop()]
437 */ 437 */
438 mddev->queue->queue_lock = &mddev->queue->__queue_lock;
439 438
440 conf = kzalloc(sizeof(multipath_conf_t), GFP_KERNEL); 439 conf = kzalloc(sizeof(multipath_conf_t), GFP_KERNEL);
441 mddev->private = conf; 440 mddev->private = conf;
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 637a96855edb..c0ac457f1218 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -361,7 +361,6 @@ static int raid0_run(mddev_t *mddev)
361 if (md_check_no_bitmap(mddev)) 361 if (md_check_no_bitmap(mddev))
362 return -EINVAL; 362 return -EINVAL;
363 blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); 363 blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
364 mddev->queue->queue_lock = &mddev->queue->__queue_lock;
365 364
366 /* if private is not null, we are here after takeover */ 365 /* if private is not null, we are here after takeover */
367 if (mddev->private == NULL) { 366 if (mddev->private == NULL) {
@@ -670,6 +669,7 @@ static void *raid0_takeover_raid1(mddev_t *mddev)
670 mddev->new_layout = 0; 669 mddev->new_layout = 0;
671 mddev->new_chunk_sectors = 128; /* by default set chunk size to 64k */ 670 mddev->new_chunk_sectors = 128; /* by default set chunk size to 64k */
672 mddev->delta_disks = 1 - mddev->raid_disks; 671 mddev->delta_disks = 1 - mddev->raid_disks;
672 mddev->raid_disks = 1;
673 /* make sure it will be not marked as dirty */ 673 /* make sure it will be not marked as dirty */
674 mddev->recovery_cp = MaxSector; 674 mddev->recovery_cp = MaxSector;
675 675
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a23ffa397ba9..06cd712807d0 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -593,7 +593,10 @@ static int flush_pending_writes(conf_t *conf)
593 if (conf->pending_bio_list.head) { 593 if (conf->pending_bio_list.head) {
594 struct bio *bio; 594 struct bio *bio;
595 bio = bio_list_get(&conf->pending_bio_list); 595 bio = bio_list_get(&conf->pending_bio_list);
596 /* Only take the spinlock to quiet a warning */
597 spin_lock(conf->mddev->queue->queue_lock);
596 blk_remove_plug(conf->mddev->queue); 598 blk_remove_plug(conf->mddev->queue);
599 spin_unlock(conf->mddev->queue->queue_lock);
597 spin_unlock_irq(&conf->device_lock); 600 spin_unlock_irq(&conf->device_lock);
598 /* flush any pending bitmap writes to 601 /* flush any pending bitmap writes to
599 * disk before proceeding w/ I/O */ 602 * disk before proceeding w/ I/O */
@@ -959,7 +962,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
959 atomic_inc(&r1_bio->remaining); 962 atomic_inc(&r1_bio->remaining);
960 spin_lock_irqsave(&conf->device_lock, flags); 963 spin_lock_irqsave(&conf->device_lock, flags);
961 bio_list_add(&conf->pending_bio_list, mbio); 964 bio_list_add(&conf->pending_bio_list, mbio);
962 blk_plug_device(mddev->queue); 965 blk_plug_device_unlocked(mddev->queue);
963 spin_unlock_irqrestore(&conf->device_lock, flags); 966 spin_unlock_irqrestore(&conf->device_lock, flags);
964 } 967 }
965 r1_bio_write_done(r1_bio, bio->bi_vcnt, behind_pages, behind_pages != NULL); 968 r1_bio_write_done(r1_bio, bio->bi_vcnt, behind_pages, behind_pages != NULL);
@@ -2021,7 +2024,6 @@ static int run(mddev_t *mddev)
2021 if (IS_ERR(conf)) 2024 if (IS_ERR(conf))
2022 return PTR_ERR(conf); 2025 return PTR_ERR(conf);
2023 2026
2024 mddev->queue->queue_lock = &conf->device_lock;
2025 list_for_each_entry(rdev, &mddev->disks, same_set) { 2027 list_for_each_entry(rdev, &mddev->disks, same_set) {
2026 disk_stack_limits(mddev->gendisk, rdev->bdev, 2028 disk_stack_limits(mddev->gendisk, rdev->bdev,
2027 rdev->data_offset << 9); 2029 rdev->data_offset << 9);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 3b607b28741b..747d061d8e05 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -662,7 +662,10 @@ static int flush_pending_writes(conf_t *conf)
662 if (conf->pending_bio_list.head) { 662 if (conf->pending_bio_list.head) {
663 struct bio *bio; 663 struct bio *bio;
664 bio = bio_list_get(&conf->pending_bio_list); 664 bio = bio_list_get(&conf->pending_bio_list);
665 /* Spinlock only taken to quiet a warning */
666 spin_lock(conf->mddev->queue->queue_lock);
665 blk_remove_plug(conf->mddev->queue); 667 blk_remove_plug(conf->mddev->queue);
668 spin_unlock(conf->mddev->queue->queue_lock);
666 spin_unlock_irq(&conf->device_lock); 669 spin_unlock_irq(&conf->device_lock);
667 /* flush any pending bitmap writes to disk 670 /* flush any pending bitmap writes to disk
668 * before proceeding w/ I/O */ 671 * before proceeding w/ I/O */
@@ -971,7 +974,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
971 atomic_inc(&r10_bio->remaining); 974 atomic_inc(&r10_bio->remaining);
972 spin_lock_irqsave(&conf->device_lock, flags); 975 spin_lock_irqsave(&conf->device_lock, flags);
973 bio_list_add(&conf->pending_bio_list, mbio); 976 bio_list_add(&conf->pending_bio_list, mbio);
974 blk_plug_device(mddev->queue); 977 blk_plug_device_unlocked(mddev->queue);
975 spin_unlock_irqrestore(&conf->device_lock, flags); 978 spin_unlock_irqrestore(&conf->device_lock, flags);
976 } 979 }
977 980
@@ -2304,8 +2307,6 @@ static int run(mddev_t *mddev)
2304 if (!conf) 2307 if (!conf)
2305 goto out; 2308 goto out;
2306 2309
2307 mddev->queue->queue_lock = &conf->device_lock;
2308
2309 mddev->thread = conf->thread; 2310 mddev->thread = conf->thread;
2310 conf->thread = NULL; 2311 conf->thread = NULL;
2311 2312
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 702812824195..78536fdbd87f 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5204,7 +5204,6 @@ static int run(mddev_t *mddev)
5204 5204
5205 mddev->queue->backing_dev_info.congested_data = mddev; 5205 mddev->queue->backing_dev_info.congested_data = mddev;
5206 mddev->queue->backing_dev_info.congested_fn = raid5_congested; 5206 mddev->queue->backing_dev_info.congested_fn = raid5_congested;
5207 mddev->queue->queue_lock = &conf->device_lock;
5208 mddev->queue->unplug_fn = raid5_unplug_queue; 5207 mddev->queue->unplug_fn = raid5_unplug_queue;
5209 5208
5210 chunk_size = mddev->chunk_sectors << 9; 5209 chunk_size = mddev->chunk_sectors << 9;
diff --git a/drivers/media/common/tuners/tda8290.c b/drivers/media/common/tuners/tda8290.c
index bc6a67768af1..8c4852114eeb 100644
--- a/drivers/media/common/tuners/tda8290.c
+++ b/drivers/media/common/tuners/tda8290.c
@@ -658,13 +658,13 @@ static int tda8290_probe(struct tuner_i2c_props *i2c_props)
658#define TDA8290_ID 0x89 658#define TDA8290_ID 0x89
659 u8 reg = 0x1f, id; 659 u8 reg = 0x1f, id;
660 struct i2c_msg msg_read[] = { 660 struct i2c_msg msg_read[] = {
661 { .addr = 0x4b, .flags = 0, .len = 1, .buf = &reg }, 661 { .addr = i2c_props->addr, .flags = 0, .len = 1, .buf = &reg },
662 { .addr = 0x4b, .flags = I2C_M_RD, .len = 1, .buf = &id }, 662 { .addr = i2c_props->addr, .flags = I2C_M_RD, .len = 1, .buf = &id },
663 }; 663 };
664 664
665 /* detect tda8290 */ 665 /* detect tda8290 */
666 if (i2c_transfer(i2c_props->adap, msg_read, 2) != 2) { 666 if (i2c_transfer(i2c_props->adap, msg_read, 2) != 2) {
667 printk(KERN_WARNING "%s: tda8290 couldn't read register 0x%02x\n", 667 printk(KERN_WARNING "%s: couldn't read register 0x%02x\n",
668 __func__, reg); 668 __func__, reg);
669 return -ENODEV; 669 return -ENODEV;
670 } 670 }
@@ -685,13 +685,13 @@ static int tda8295_probe(struct tuner_i2c_props *i2c_props)
685#define TDA8295C2_ID 0x8b 685#define TDA8295C2_ID 0x8b
686 u8 reg = 0x2f, id; 686 u8 reg = 0x2f, id;
687 struct i2c_msg msg_read[] = { 687 struct i2c_msg msg_read[] = {
688 { .addr = 0x4b, .flags = 0, .len = 1, .buf = &reg }, 688 { .addr = i2c_props->addr, .flags = 0, .len = 1, .buf = &reg },
689 { .addr = 0x4b, .flags = I2C_M_RD, .len = 1, .buf = &id }, 689 { .addr = i2c_props->addr, .flags = I2C_M_RD, .len = 1, .buf = &id },
690 }; 690 };
691 691
692 /* detect tda8290 */ 692 /* detect tda8295 */
693 if (i2c_transfer(i2c_props->adap, msg_read, 2) != 2) { 693 if (i2c_transfer(i2c_props->adap, msg_read, 2) != 2) {
694 printk(KERN_WARNING "%s: tda8290 couldn't read register 0x%02x\n", 694 printk(KERN_WARNING "%s: couldn't read register 0x%02x\n",
695 __func__, reg); 695 __func__, reg);
696 return -ENODEV; 696 return -ENODEV;
697 } 697 }
diff --git a/drivers/media/dvb/dvb-usb/dib0700_devices.c b/drivers/media/dvb/dvb-usb/dib0700_devices.c
index defd83964ce2..193cdb77b76a 100644
--- a/drivers/media/dvb/dvb-usb/dib0700_devices.c
+++ b/drivers/media/dvb/dvb-usb/dib0700_devices.c
@@ -870,6 +870,23 @@ static int dib7070p_tuner_attach(struct dvb_usb_adapter *adap)
870 return 0; 870 return 0;
871} 871}
872 872
873static int stk7700p_pid_filter(struct dvb_usb_adapter *adapter, int index,
874 u16 pid, int onoff)
875{
876 struct dib0700_state *st = adapter->dev->priv;
877 if (st->is_dib7000pc)
878 return dib7000p_pid_filter(adapter->fe, index, pid, onoff);
879 return dib7000m_pid_filter(adapter->fe, index, pid, onoff);
880}
881
882static int stk7700p_pid_filter_ctrl(struct dvb_usb_adapter *adapter, int onoff)
883{
884 struct dib0700_state *st = adapter->dev->priv;
885 if (st->is_dib7000pc)
886 return dib7000p_pid_filter_ctrl(adapter->fe, onoff);
887 return dib7000m_pid_filter_ctrl(adapter->fe, onoff);
888}
889
873static int stk70x0p_pid_filter(struct dvb_usb_adapter *adapter, int index, u16 pid, int onoff) 890static int stk70x0p_pid_filter(struct dvb_usb_adapter *adapter, int index, u16 pid, int onoff)
874{ 891{
875 return dib7000p_pid_filter(adapter->fe, index, pid, onoff); 892 return dib7000p_pid_filter(adapter->fe, index, pid, onoff);
@@ -1875,8 +1892,8 @@ struct dvb_usb_device_properties dib0700_devices[] = {
1875 { 1892 {
1876 .caps = DVB_USB_ADAP_HAS_PID_FILTER | DVB_USB_ADAP_PID_FILTER_CAN_BE_TURNED_OFF, 1893 .caps = DVB_USB_ADAP_HAS_PID_FILTER | DVB_USB_ADAP_PID_FILTER_CAN_BE_TURNED_OFF,
1877 .pid_filter_count = 32, 1894 .pid_filter_count = 32,
1878 .pid_filter = stk70x0p_pid_filter, 1895 .pid_filter = stk7700p_pid_filter,
1879 .pid_filter_ctrl = stk70x0p_pid_filter_ctrl, 1896 .pid_filter_ctrl = stk7700p_pid_filter_ctrl,
1880 .frontend_attach = stk7700p_frontend_attach, 1897 .frontend_attach = stk7700p_frontend_attach,
1881 .tuner_attach = stk7700p_tuner_attach, 1898 .tuner_attach = stk7700p_tuner_attach,
1882 1899
diff --git a/drivers/media/dvb/dvb-usb/lmedm04.c b/drivers/media/dvb/dvb-usb/lmedm04.c
index 9eea4188303b..46ccd01a7696 100644
--- a/drivers/media/dvb/dvb-usb/lmedm04.c
+++ b/drivers/media/dvb/dvb-usb/lmedm04.c
@@ -659,7 +659,7 @@ static int lme2510_download_firmware(struct usb_device *dev,
659} 659}
660 660
661/* Default firmware for LME2510C */ 661/* Default firmware for LME2510C */
662const char lme_firmware[50] = "dvb-usb-lme2510c-s7395.fw"; 662char lme_firmware[50] = "dvb-usb-lme2510c-s7395.fw";
663 663
664static void lme_coldreset(struct usb_device *dev) 664static void lme_coldreset(struct usb_device *dev)
665{ 665{
@@ -1006,7 +1006,7 @@ static struct dvb_usb_device_properties lme2510c_properties = {
1006 .caps = DVB_USB_IS_AN_I2C_ADAPTER, 1006 .caps = DVB_USB_IS_AN_I2C_ADAPTER,
1007 .usb_ctrl = DEVICE_SPECIFIC, 1007 .usb_ctrl = DEVICE_SPECIFIC,
1008 .download_firmware = lme2510_download_firmware, 1008 .download_firmware = lme2510_download_firmware,
1009 .firmware = lme_firmware, 1009 .firmware = (const char *)&lme_firmware,
1010 .size_of_priv = sizeof(struct lme2510_state), 1010 .size_of_priv = sizeof(struct lme2510_state),
1011 .num_adapters = 1, 1011 .num_adapters = 1,
1012 .adapter = { 1012 .adapter = {
@@ -1109,5 +1109,5 @@ module_exit(lme2510_module_exit);
1109 1109
1110MODULE_AUTHOR("Malcolm Priestley <tvboxspy@gmail.com>"); 1110MODULE_AUTHOR("Malcolm Priestley <tvboxspy@gmail.com>");
1111MODULE_DESCRIPTION("LME2510(C) DVB-S USB2.0"); 1111MODULE_DESCRIPTION("LME2510(C) DVB-S USB2.0");
1112MODULE_VERSION("1.74"); 1112MODULE_VERSION("1.75");
1113MODULE_LICENSE("GPL"); 1113MODULE_LICENSE("GPL");
diff --git a/drivers/media/dvb/frontends/dib7000m.c b/drivers/media/dvb/frontends/dib7000m.c
index c7f5ccf54aa5..289a79837f24 100644
--- a/drivers/media/dvb/frontends/dib7000m.c
+++ b/drivers/media/dvb/frontends/dib7000m.c
@@ -1285,6 +1285,25 @@ struct i2c_adapter * dib7000m_get_i2c_master(struct dvb_frontend *demod, enum di
1285} 1285}
1286EXPORT_SYMBOL(dib7000m_get_i2c_master); 1286EXPORT_SYMBOL(dib7000m_get_i2c_master);
1287 1287
1288int dib7000m_pid_filter_ctrl(struct dvb_frontend *fe, u8 onoff)
1289{
1290 struct dib7000m_state *state = fe->demodulator_priv;
1291 u16 val = dib7000m_read_word(state, 294 + state->reg_offs) & 0xffef;
1292 val |= (onoff & 0x1) << 4;
1293 dprintk("PID filter enabled %d", onoff);
1294 return dib7000m_write_word(state, 294 + state->reg_offs, val);
1295}
1296EXPORT_SYMBOL(dib7000m_pid_filter_ctrl);
1297
1298int dib7000m_pid_filter(struct dvb_frontend *fe, u8 id, u16 pid, u8 onoff)
1299{
1300 struct dib7000m_state *state = fe->demodulator_priv;
1301 dprintk("PID filter: index %x, PID %d, OnOff %d", id, pid, onoff);
1302 return dib7000m_write_word(state, 300 + state->reg_offs + id,
1303 onoff ? (1 << 13) | pid : 0);
1304}
1305EXPORT_SYMBOL(dib7000m_pid_filter);
1306
1288#if 0 1307#if 0
1289/* used with some prototype boards */ 1308/* used with some prototype boards */
1290int dib7000m_i2c_enumeration(struct i2c_adapter *i2c, int no_of_demods, 1309int dib7000m_i2c_enumeration(struct i2c_adapter *i2c, int no_of_demods,
diff --git a/drivers/media/dvb/frontends/dib7000m.h b/drivers/media/dvb/frontends/dib7000m.h
index 113819ce9f0d..81fcf2241c64 100644
--- a/drivers/media/dvb/frontends/dib7000m.h
+++ b/drivers/media/dvb/frontends/dib7000m.h
@@ -46,6 +46,8 @@ extern struct dvb_frontend *dib7000m_attach(struct i2c_adapter *i2c_adap,
46extern struct i2c_adapter *dib7000m_get_i2c_master(struct dvb_frontend *, 46extern struct i2c_adapter *dib7000m_get_i2c_master(struct dvb_frontend *,
47 enum dibx000_i2c_interface, 47 enum dibx000_i2c_interface,
48 int); 48 int);
49extern int dib7000m_pid_filter(struct dvb_frontend *, u8 id, u16 pid, u8 onoff);
50extern int dib7000m_pid_filter_ctrl(struct dvb_frontend *fe, u8 onoff);
49#else 51#else
50static inline 52static inline
51struct dvb_frontend *dib7000m_attach(struct i2c_adapter *i2c_adap, 53struct dvb_frontend *dib7000m_attach(struct i2c_adapter *i2c_adap,
@@ -63,6 +65,19 @@ struct i2c_adapter *dib7000m_get_i2c_master(struct dvb_frontend *demod,
63 printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__); 65 printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__);
64 return NULL; 66 return NULL;
65} 67}
68static inline int dib7000m_pid_filter(struct dvb_frontend *fe, u8 id,
69 u16 pid, u8 onoff)
70{
71 printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__);
72 return -ENODEV;
73}
74
75static inline int dib7000m_pid_filter_ctrl(struct dvb_frontend *fe,
76 uint8_t onoff)
77{
78 printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__);
79 return -ENODEV;
80}
66#endif 81#endif
67 82
68/* TODO 83/* TODO
diff --git a/drivers/media/dvb/mantis/mantis_pci.c b/drivers/media/dvb/mantis/mantis_pci.c
index 59feeb84aec7..10a432a79d00 100644
--- a/drivers/media/dvb/mantis/mantis_pci.c
+++ b/drivers/media/dvb/mantis/mantis_pci.c
@@ -22,7 +22,6 @@
22#include <linux/moduleparam.h> 22#include <linux/moduleparam.h>
23#include <linux/kernel.h> 23#include <linux/kernel.h>
24#include <asm/io.h> 24#include <asm/io.h>
25#include <asm/pgtable.h>
26#include <asm/page.h> 25#include <asm/page.h>
27#include <linux/kmod.h> 26#include <linux/kmod.h>
28#include <linux/vmalloc.h> 27#include <linux/vmalloc.h>
diff --git a/drivers/media/rc/ir-raw.c b/drivers/media/rc/ir-raw.c
index 73230ff93b8a..01f258a2a57a 100644
--- a/drivers/media/rc/ir-raw.c
+++ b/drivers/media/rc/ir-raw.c
@@ -112,7 +112,7 @@ int ir_raw_event_store_edge(struct rc_dev *dev, enum raw_event_type type)
112{ 112{
113 ktime_t now; 113 ktime_t now;
114 s64 delta; /* ns */ 114 s64 delta; /* ns */
115 struct ir_raw_event ev; 115 DEFINE_IR_RAW_EVENT(ev);
116 int rc = 0; 116 int rc = 0;
117 117
118 if (!dev->raw) 118 if (!dev->raw)
@@ -125,7 +125,6 @@ int ir_raw_event_store_edge(struct rc_dev *dev, enum raw_event_type type)
125 * being called for the first time, note that delta can't 125 * being called for the first time, note that delta can't
126 * possibly be negative. 126 * possibly be negative.
127 */ 127 */
128 ev.duration = 0;
129 if (delta > IR_MAX_DURATION || !dev->raw->last_type) 128 if (delta > IR_MAX_DURATION || !dev->raw->last_type)
130 type |= IR_START_EVENT; 129 type |= IR_START_EVENT;
131 else 130 else
diff --git a/drivers/media/rc/mceusb.c b/drivers/media/rc/mceusb.c
index 6df0a4980645..e4f8eac7f717 100644
--- a/drivers/media/rc/mceusb.c
+++ b/drivers/media/rc/mceusb.c
@@ -148,6 +148,7 @@ enum mceusb_model_type {
148 MCE_GEN2_TX_INV, 148 MCE_GEN2_TX_INV,
149 POLARIS_EVK, 149 POLARIS_EVK,
150 CX_HYBRID_TV, 150 CX_HYBRID_TV,
151 MULTIFUNCTION,
151}; 152};
152 153
153struct mceusb_model { 154struct mceusb_model {
@@ -155,9 +156,10 @@ struct mceusb_model {
155 u32 mce_gen2:1; 156 u32 mce_gen2:1;
156 u32 mce_gen3:1; 157 u32 mce_gen3:1;
157 u32 tx_mask_normal:1; 158 u32 tx_mask_normal:1;
158 u32 is_polaris:1;
159 u32 no_tx:1; 159 u32 no_tx:1;
160 160
161 int ir_intfnum;
162
161 const char *rc_map; /* Allow specify a per-board map */ 163 const char *rc_map; /* Allow specify a per-board map */
162 const char *name; /* per-board name */ 164 const char *name; /* per-board name */
163}; 165};
@@ -179,7 +181,6 @@ static const struct mceusb_model mceusb_model[] = {
179 .tx_mask_normal = 1, 181 .tx_mask_normal = 1,
180 }, 182 },
181 [POLARIS_EVK] = { 183 [POLARIS_EVK] = {
182 .is_polaris = 1,
183 /* 184 /*
184 * In fact, the EVK is shipped without 185 * In fact, the EVK is shipped without
185 * remotes, but we should have something handy, 186 * remotes, but we should have something handy,
@@ -189,10 +190,13 @@ static const struct mceusb_model mceusb_model[] = {
189 .name = "Conexant Hybrid TV (cx231xx) MCE IR", 190 .name = "Conexant Hybrid TV (cx231xx) MCE IR",
190 }, 191 },
191 [CX_HYBRID_TV] = { 192 [CX_HYBRID_TV] = {
192 .is_polaris = 1,
193 .no_tx = 1, /* tx isn't wired up at all */ 193 .no_tx = 1, /* tx isn't wired up at all */
194 .name = "Conexant Hybrid TV (cx231xx) MCE IR", 194 .name = "Conexant Hybrid TV (cx231xx) MCE IR",
195 }, 195 },
196 [MULTIFUNCTION] = {
197 .mce_gen2 = 1,
198 .ir_intfnum = 2,
199 },
196}; 200};
197 201
198static struct usb_device_id mceusb_dev_table[] = { 202static struct usb_device_id mceusb_dev_table[] = {
@@ -216,8 +220,9 @@ static struct usb_device_id mceusb_dev_table[] = {
216 { USB_DEVICE(VENDOR_PHILIPS, 0x206c) }, 220 { USB_DEVICE(VENDOR_PHILIPS, 0x206c) },
217 /* Philips/Spinel plus IR transceiver for ASUS */ 221 /* Philips/Spinel plus IR transceiver for ASUS */
218 { USB_DEVICE(VENDOR_PHILIPS, 0x2088) }, 222 { USB_DEVICE(VENDOR_PHILIPS, 0x2088) },
219 /* Realtek MCE IR Receiver */ 223 /* Realtek MCE IR Receiver and card reader */
220 { USB_DEVICE(VENDOR_REALTEK, 0x0161) }, 224 { USB_DEVICE(VENDOR_REALTEK, 0x0161),
225 .driver_info = MULTIFUNCTION },
221 /* SMK/Toshiba G83C0004D410 */ 226 /* SMK/Toshiba G83C0004D410 */
222 { USB_DEVICE(VENDOR_SMK, 0x031d), 227 { USB_DEVICE(VENDOR_SMK, 0x031d),
223 .driver_info = MCE_GEN2_TX_INV }, 228 .driver_info = MCE_GEN2_TX_INV },
@@ -1101,7 +1106,7 @@ static int __devinit mceusb_dev_probe(struct usb_interface *intf,
1101 bool is_gen3; 1106 bool is_gen3;
1102 bool is_microsoft_gen1; 1107 bool is_microsoft_gen1;
1103 bool tx_mask_normal; 1108 bool tx_mask_normal;
1104 bool is_polaris; 1109 int ir_intfnum;
1105 1110
1106 dev_dbg(&intf->dev, "%s called\n", __func__); 1111 dev_dbg(&intf->dev, "%s called\n", __func__);
1107 1112
@@ -1110,13 +1115,11 @@ static int __devinit mceusb_dev_probe(struct usb_interface *intf,
1110 is_gen3 = mceusb_model[model].mce_gen3; 1115 is_gen3 = mceusb_model[model].mce_gen3;
1111 is_microsoft_gen1 = mceusb_model[model].mce_gen1; 1116 is_microsoft_gen1 = mceusb_model[model].mce_gen1;
1112 tx_mask_normal = mceusb_model[model].tx_mask_normal; 1117 tx_mask_normal = mceusb_model[model].tx_mask_normal;
1113 is_polaris = mceusb_model[model].is_polaris; 1118 ir_intfnum = mceusb_model[model].ir_intfnum;
1114 1119
1115 if (is_polaris) { 1120 /* There are multi-function devices with non-IR interfaces */
1116 /* Interface 0 is IR */ 1121 if (idesc->desc.bInterfaceNumber != ir_intfnum)
1117 if (idesc->desc.bInterfaceNumber) 1122 return -ENODEV;
1118 return -ENODEV;
1119 }
1120 1123
1121 /* step through the endpoints to find first bulk in and out endpoint */ 1124 /* step through the endpoints to find first bulk in and out endpoint */
1122 for (i = 0; i < idesc->desc.bNumEndpoints; ++i) { 1125 for (i = 0; i < idesc->desc.bNumEndpoints; ++i) {
diff --git a/drivers/media/rc/nuvoton-cir.c b/drivers/media/rc/nuvoton-cir.c
index 273d9d674792..d4d64492a057 100644
--- a/drivers/media/rc/nuvoton-cir.c
+++ b/drivers/media/rc/nuvoton-cir.c
@@ -385,8 +385,9 @@ static void nvt_cir_regs_init(struct nvt_dev *nvt)
385 385
386static void nvt_cir_wake_regs_init(struct nvt_dev *nvt) 386static void nvt_cir_wake_regs_init(struct nvt_dev *nvt)
387{ 387{
388 /* set number of bytes needed for wake key comparison (default 67) */ 388 /* set number of bytes needed for wake from s3 (default 65) */
389 nvt_cir_wake_reg_write(nvt, CIR_WAKE_FIFO_LEN, CIR_WAKE_FIFO_CMP_DEEP); 389 nvt_cir_wake_reg_write(nvt, CIR_WAKE_FIFO_CMP_BYTES,
390 CIR_WAKE_FIFO_CMP_DEEP);
390 391
391 /* set tolerance/variance allowed per byte during wake compare */ 392 /* set tolerance/variance allowed per byte during wake compare */
392 nvt_cir_wake_reg_write(nvt, CIR_WAKE_CMP_TOLERANCE, 393 nvt_cir_wake_reg_write(nvt, CIR_WAKE_CMP_TOLERANCE,
diff --git a/drivers/media/rc/nuvoton-cir.h b/drivers/media/rc/nuvoton-cir.h
index 1df82351cb03..048135eea702 100644
--- a/drivers/media/rc/nuvoton-cir.h
+++ b/drivers/media/rc/nuvoton-cir.h
@@ -305,8 +305,11 @@ struct nvt_dev {
305#define CIR_WAKE_IRFIFOSTS_RX_EMPTY 0x20 305#define CIR_WAKE_IRFIFOSTS_RX_EMPTY 0x20
306#define CIR_WAKE_IRFIFOSTS_RX_FULL 0x10 306#define CIR_WAKE_IRFIFOSTS_RX_FULL 0x10
307 307
308/* CIR Wake FIFO buffer is 67 bytes long */ 308/*
309#define CIR_WAKE_FIFO_LEN 67 309 * The CIR Wake FIFO buffer is 67 bytes long, but the stock remote wakes
310 * the system comparing only 65 bytes (fails with this set to 67)
311 */
312#define CIR_WAKE_FIFO_CMP_BYTES 65
310/* CIR Wake byte comparison tolerance */ 313/* CIR Wake byte comparison tolerance */
311#define CIR_WAKE_CMP_TOLERANCE 5 314#define CIR_WAKE_CMP_TOLERANCE 5
312 315
diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c
index 512a2f4ada0e..5b4422ef4e6d 100644
--- a/drivers/media/rc/rc-main.c
+++ b/drivers/media/rc/rc-main.c
@@ -850,7 +850,7 @@ static ssize_t store_protocols(struct device *device,
850 count++; 850 count++;
851 } else { 851 } else {
852 for (i = 0; i < ARRAY_SIZE(proto_names); i++) { 852 for (i = 0; i < ARRAY_SIZE(proto_names); i++) {
853 if (!strncasecmp(tmp, proto_names[i].name, strlen(proto_names[i].name))) { 853 if (!strcasecmp(tmp, proto_names[i].name)) {
854 tmp += strlen(proto_names[i].name); 854 tmp += strlen(proto_names[i].name);
855 mask = proto_names[i].type; 855 mask = proto_names[i].type;
856 break; 856 break;
diff --git a/drivers/media/video/au0828/au0828-video.c b/drivers/media/video/au0828/au0828-video.c
index e41e4ad5cc40..9c475c600fc9 100644
--- a/drivers/media/video/au0828/au0828-video.c
+++ b/drivers/media/video/au0828/au0828-video.c
@@ -1758,7 +1758,12 @@ static int vidioc_reqbufs(struct file *file, void *priv,
1758 if (rc < 0) 1758 if (rc < 0)
1759 return rc; 1759 return rc;
1760 1760
1761 return videobuf_reqbufs(&fh->vb_vidq, rb); 1761 if (fh->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
1762 rc = videobuf_reqbufs(&fh->vb_vidq, rb);
1763 else if (fh->type == V4L2_BUF_TYPE_VBI_CAPTURE)
1764 rc = videobuf_reqbufs(&fh->vb_vbiq, rb);
1765
1766 return rc;
1762} 1767}
1763 1768
1764static int vidioc_querybuf(struct file *file, void *priv, 1769static int vidioc_querybuf(struct file *file, void *priv,
@@ -1772,7 +1777,12 @@ static int vidioc_querybuf(struct file *file, void *priv,
1772 if (rc < 0) 1777 if (rc < 0)
1773 return rc; 1778 return rc;
1774 1779
1775 return videobuf_querybuf(&fh->vb_vidq, b); 1780 if (fh->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
1781 rc = videobuf_querybuf(&fh->vb_vidq, b);
1782 else if (fh->type == V4L2_BUF_TYPE_VBI_CAPTURE)
1783 rc = videobuf_querybuf(&fh->vb_vbiq, b);
1784
1785 return rc;
1776} 1786}
1777 1787
1778static int vidioc_qbuf(struct file *file, void *priv, struct v4l2_buffer *b) 1788static int vidioc_qbuf(struct file *file, void *priv, struct v4l2_buffer *b)
@@ -1785,7 +1795,12 @@ static int vidioc_qbuf(struct file *file, void *priv, struct v4l2_buffer *b)
1785 if (rc < 0) 1795 if (rc < 0)
1786 return rc; 1796 return rc;
1787 1797
1788 return videobuf_qbuf(&fh->vb_vidq, b); 1798 if (fh->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
1799 rc = videobuf_qbuf(&fh->vb_vidq, b);
1800 else if (fh->type == V4L2_BUF_TYPE_VBI_CAPTURE)
1801 rc = videobuf_qbuf(&fh->vb_vbiq, b);
1802
1803 return rc;
1789} 1804}
1790 1805
1791static int vidioc_dqbuf(struct file *file, void *priv, struct v4l2_buffer *b) 1806static int vidioc_dqbuf(struct file *file, void *priv, struct v4l2_buffer *b)
@@ -1806,7 +1821,12 @@ static int vidioc_dqbuf(struct file *file, void *priv, struct v4l2_buffer *b)
1806 dev->greenscreen_detected = 0; 1821 dev->greenscreen_detected = 0;
1807 } 1822 }
1808 1823
1809 return videobuf_dqbuf(&fh->vb_vidq, b, file->f_flags & O_NONBLOCK); 1824 if (fh->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
1825 rc = videobuf_dqbuf(&fh->vb_vidq, b, file->f_flags & O_NONBLOCK);
1826 else if (fh->type == V4L2_BUF_TYPE_VBI_CAPTURE)
1827 rc = videobuf_dqbuf(&fh->vb_vbiq, b, file->f_flags & O_NONBLOCK);
1828
1829 return rc;
1810} 1830}
1811 1831
1812static struct v4l2_file_operations au0828_v4l_fops = { 1832static struct v4l2_file_operations au0828_v4l_fops = {
diff --git a/drivers/media/video/cx18/cx18-cards.c b/drivers/media/video/cx18/cx18-cards.c
index 87177733cf92..68ad1963f421 100644
--- a/drivers/media/video/cx18/cx18-cards.c
+++ b/drivers/media/video/cx18/cx18-cards.c
@@ -95,6 +95,53 @@ static const struct cx18_card cx18_card_hvr1600_esmt = {
95 .i2c = &cx18_i2c_std, 95 .i2c = &cx18_i2c_std,
96}; 96};
97 97
98static const struct cx18_card cx18_card_hvr1600_s5h1411 = {
99 .type = CX18_CARD_HVR_1600_S5H1411,
100 .name = "Hauppauge HVR-1600",
101 .comment = "Simultaneous Digital and Analog TV capture supported\n",
102 .v4l2_capabilities = CX18_CAP_ENCODER,
103 .hw_audio_ctrl = CX18_HW_418_AV,
104 .hw_muxer = CX18_HW_CS5345,
105 .hw_all = CX18_HW_TVEEPROM | CX18_HW_418_AV | CX18_HW_TUNER |
106 CX18_HW_CS5345 | CX18_HW_DVB | CX18_HW_GPIO_RESET_CTRL |
107 CX18_HW_Z8F0811_IR_HAUP,
108 .video_inputs = {
109 { CX18_CARD_INPUT_VID_TUNER, 0, CX18_AV_COMPOSITE7 },
110 { CX18_CARD_INPUT_SVIDEO1, 1, CX18_AV_SVIDEO1 },
111 { CX18_CARD_INPUT_COMPOSITE1, 1, CX18_AV_COMPOSITE3 },
112 { CX18_CARD_INPUT_SVIDEO2, 2, CX18_AV_SVIDEO2 },
113 { CX18_CARD_INPUT_COMPOSITE2, 2, CX18_AV_COMPOSITE4 },
114 },
115 .audio_inputs = {
116 { CX18_CARD_INPUT_AUD_TUNER,
117 CX18_AV_AUDIO8, CS5345_IN_1 | CS5345_MCLK_1_5 },
118 { CX18_CARD_INPUT_LINE_IN1,
119 CX18_AV_AUDIO_SERIAL1, CS5345_IN_2 },
120 { CX18_CARD_INPUT_LINE_IN2,
121 CX18_AV_AUDIO_SERIAL1, CS5345_IN_3 },
122 },
123 .radio_input = { CX18_CARD_INPUT_AUD_TUNER,
124 CX18_AV_AUDIO_SERIAL1, CS5345_IN_4 },
125 .ddr = {
126 /* ESMT M13S128324A-5B memory */
127 .chip_config = 0x003,
128 .refresh = 0x30c,
129 .timing1 = 0x44220e82,
130 .timing2 = 0x08,
131 .tune_lane = 0,
132 .initial_emrs = 0,
133 },
134 .gpio_init.initial_value = 0x3001,
135 .gpio_init.direction = 0x3001,
136 .gpio_i2c_slave_reset = {
137 .active_lo_mask = 0x3001,
138 .msecs_asserted = 10,
139 .msecs_recovery = 40,
140 .ir_reset_mask = 0x0001,
141 },
142 .i2c = &cx18_i2c_std,
143};
144
98static const struct cx18_card cx18_card_hvr1600_samsung = { 145static const struct cx18_card cx18_card_hvr1600_samsung = {
99 .type = CX18_CARD_HVR_1600_SAMSUNG, 146 .type = CX18_CARD_HVR_1600_SAMSUNG,
100 .name = "Hauppauge HVR-1600 (Preproduction)", 147 .name = "Hauppauge HVR-1600 (Preproduction)",
@@ -523,7 +570,8 @@ static const struct cx18_card *cx18_card_list[] = {
523 &cx18_card_toshiba_qosmio_dvbt, 570 &cx18_card_toshiba_qosmio_dvbt,
524 &cx18_card_leadtek_pvr2100, 571 &cx18_card_leadtek_pvr2100,
525 &cx18_card_leadtek_dvr3100h, 572 &cx18_card_leadtek_dvr3100h,
526 &cx18_card_gotview_dvd3 573 &cx18_card_gotview_dvd3,
574 &cx18_card_hvr1600_s5h1411
527}; 575};
528 576
529const struct cx18_card *cx18_get_card(u16 index) 577const struct cx18_card *cx18_get_card(u16 index)
diff --git a/drivers/media/video/cx18/cx18-driver.c b/drivers/media/video/cx18/cx18-driver.c
index 944af8adbe0c..b1c3cbd92743 100644
--- a/drivers/media/video/cx18/cx18-driver.c
+++ b/drivers/media/video/cx18/cx18-driver.c
@@ -157,6 +157,7 @@ MODULE_PARM_DESC(cardtype,
157 "\t\t\t 7 = Leadtek WinFast PVR2100\n" 157 "\t\t\t 7 = Leadtek WinFast PVR2100\n"
158 "\t\t\t 8 = Leadtek WinFast DVR3100 H\n" 158 "\t\t\t 8 = Leadtek WinFast DVR3100 H\n"
159 "\t\t\t 9 = GoTView PCI DVD3 Hybrid\n" 159 "\t\t\t 9 = GoTView PCI DVD3 Hybrid\n"
160 "\t\t\t 10 = Hauppauge HVR 1600 (S5H1411)\n"
160 "\t\t\t 0 = Autodetect (default)\n" 161 "\t\t\t 0 = Autodetect (default)\n"
161 "\t\t\t-1 = Ignore this card\n\t\t"); 162 "\t\t\t-1 = Ignore this card\n\t\t");
162MODULE_PARM_DESC(pal, "Set PAL standard: B, G, H, D, K, I, M, N, Nc, 60"); 163MODULE_PARM_DESC(pal, "Set PAL standard: B, G, H, D, K, I, M, N, Nc, 60");
@@ -337,6 +338,7 @@ void cx18_read_eeprom(struct cx18 *cx, struct tveeprom *tv)
337 switch (cx->card->type) { 338 switch (cx->card->type) {
338 case CX18_CARD_HVR_1600_ESMT: 339 case CX18_CARD_HVR_1600_ESMT:
339 case CX18_CARD_HVR_1600_SAMSUNG: 340 case CX18_CARD_HVR_1600_SAMSUNG:
341 case CX18_CARD_HVR_1600_S5H1411:
340 tveeprom_hauppauge_analog(&c, tv, eedata); 342 tveeprom_hauppauge_analog(&c, tv, eedata);
341 break; 343 break;
342 case CX18_CARD_YUAN_MPC718: 344 case CX18_CARD_YUAN_MPC718:
@@ -365,7 +367,25 @@ static void cx18_process_eeprom(struct cx18 *cx)
365 from the model number. Use the cardtype module option if you 367 from the model number. Use the cardtype module option if you
366 have one of these preproduction models. */ 368 have one of these preproduction models. */
367 switch (tv.model) { 369 switch (tv.model) {
368 case 74000 ... 74999: 370 case 74301: /* Retail models */
371 case 74321:
372 case 74351: /* OEM models */
373 case 74361:
374 /* Digital side is s5h1411/tda18271 */
375 cx->card = cx18_get_card(CX18_CARD_HVR_1600_S5H1411);
376 break;
377 case 74021: /* Retail models */
378 case 74031:
379 case 74041:
380 case 74141:
381 case 74541: /* OEM models */
382 case 74551:
383 case 74591:
384 case 74651:
385 case 74691:
386 case 74751:
387 case 74891:
388 /* Digital side is s5h1409/mxl5005s */
369 cx->card = cx18_get_card(CX18_CARD_HVR_1600_ESMT); 389 cx->card = cx18_get_card(CX18_CARD_HVR_1600_ESMT);
370 break; 390 break;
371 case 0x718: 391 case 0x718:
@@ -377,7 +397,8 @@ static void cx18_process_eeprom(struct cx18 *cx)
377 CX18_ERR("Invalid EEPROM\n"); 397 CX18_ERR("Invalid EEPROM\n");
378 return; 398 return;
379 default: 399 default:
380 CX18_ERR("Unknown model %d, defaulting to HVR-1600\n", tv.model); 400 CX18_ERR("Unknown model %d, defaulting to original HVR-1600 "
401 "(cardtype=1)\n", tv.model);
381 cx->card = cx18_get_card(CX18_CARD_HVR_1600_ESMT); 402 cx->card = cx18_get_card(CX18_CARD_HVR_1600_ESMT);
382 break; 403 break;
383 } 404 }
diff --git a/drivers/media/video/cx18/cx18-driver.h b/drivers/media/video/cx18/cx18-driver.h
index 306caac6d3fc..f736679d2517 100644
--- a/drivers/media/video/cx18/cx18-driver.h
+++ b/drivers/media/video/cx18/cx18-driver.h
@@ -85,7 +85,8 @@
85#define CX18_CARD_LEADTEK_PVR2100 6 /* Leadtek WinFast PVR2100 */ 85#define CX18_CARD_LEADTEK_PVR2100 6 /* Leadtek WinFast PVR2100 */
86#define CX18_CARD_LEADTEK_DVR3100H 7 /* Leadtek WinFast DVR3100 H */ 86#define CX18_CARD_LEADTEK_DVR3100H 7 /* Leadtek WinFast DVR3100 H */
87#define CX18_CARD_GOTVIEW_PCI_DVD3 8 /* GoTView PCI DVD3 Hybrid */ 87#define CX18_CARD_GOTVIEW_PCI_DVD3 8 /* GoTView PCI DVD3 Hybrid */
88#define CX18_CARD_LAST 8 88#define CX18_CARD_HVR_1600_S5H1411 9 /* Hauppauge HVR 1600 s5h1411/tda18271*/
89#define CX18_CARD_LAST 9
89 90
90#define CX18_ENC_STREAM_TYPE_MPG 0 91#define CX18_ENC_STREAM_TYPE_MPG 0
91#define CX18_ENC_STREAM_TYPE_TS 1 92#define CX18_ENC_STREAM_TYPE_TS 1
diff --git a/drivers/media/video/cx18/cx18-dvb.c b/drivers/media/video/cx18/cx18-dvb.c
index f0381d62518d..f41922bd4020 100644
--- a/drivers/media/video/cx18/cx18-dvb.c
+++ b/drivers/media/video/cx18/cx18-dvb.c
@@ -29,6 +29,8 @@
29#include "cx18-gpio.h" 29#include "cx18-gpio.h"
30#include "s5h1409.h" 30#include "s5h1409.h"
31#include "mxl5005s.h" 31#include "mxl5005s.h"
32#include "s5h1411.h"
33#include "tda18271.h"
32#include "zl10353.h" 34#include "zl10353.h"
33 35
34#include <linux/firmware.h> 36#include <linux/firmware.h>
@@ -77,6 +79,32 @@ static struct s5h1409_config hauppauge_hvr1600_config = {
77}; 79};
78 80
79/* 81/*
82 * CX18_CARD_HVR_1600_S5H1411
83 */
84static struct s5h1411_config hcw_s5h1411_config = {
85 .output_mode = S5H1411_SERIAL_OUTPUT,
86 .gpio = S5H1411_GPIO_OFF,
87 .vsb_if = S5H1411_IF_44000,
88 .qam_if = S5H1411_IF_4000,
89 .inversion = S5H1411_INVERSION_ON,
90 .status_mode = S5H1411_DEMODLOCKING,
91 .mpeg_timing = S5H1411_MPEGTIMING_CONTINOUS_NONINVERTING_CLOCK,
92};
93
94static struct tda18271_std_map hauppauge_tda18271_std_map = {
95 .atsc_6 = { .if_freq = 5380, .agc_mode = 3, .std = 3,
96 .if_lvl = 6, .rfagc_top = 0x37 },
97 .qam_6 = { .if_freq = 4000, .agc_mode = 3, .std = 0,
98 .if_lvl = 6, .rfagc_top = 0x37 },
99};
100
101static struct tda18271_config hauppauge_tda18271_config = {
102 .std_map = &hauppauge_tda18271_std_map,
103 .gate = TDA18271_GATE_DIGITAL,
104 .output_opt = TDA18271_OUTPUT_LT_OFF,
105};
106
107/*
80 * CX18_CARD_LEADTEK_DVR3100H 108 * CX18_CARD_LEADTEK_DVR3100H
81 */ 109 */
82/* Information/confirmation of proper config values provided by Terry Wu */ 110/* Information/confirmation of proper config values provided by Terry Wu */
@@ -244,6 +272,7 @@ static int cx18_dvb_start_feed(struct dvb_demux_feed *feed)
244 switch (cx->card->type) { 272 switch (cx->card->type) {
245 case CX18_CARD_HVR_1600_ESMT: 273 case CX18_CARD_HVR_1600_ESMT:
246 case CX18_CARD_HVR_1600_SAMSUNG: 274 case CX18_CARD_HVR_1600_SAMSUNG:
275 case CX18_CARD_HVR_1600_S5H1411:
247 v = cx18_read_reg(cx, CX18_REG_DMUX_NUM_PORT_0_CONTROL); 276 v = cx18_read_reg(cx, CX18_REG_DMUX_NUM_PORT_0_CONTROL);
248 v |= 0x00400000; /* Serial Mode */ 277 v |= 0x00400000; /* Serial Mode */
249 v |= 0x00002000; /* Data Length - Byte */ 278 v |= 0x00002000; /* Data Length - Byte */
@@ -455,6 +484,15 @@ static int dvb_register(struct cx18_stream *stream)
455 ret = 0; 484 ret = 0;
456 } 485 }
457 break; 486 break;
487 case CX18_CARD_HVR_1600_S5H1411:
488 dvb->fe = dvb_attach(s5h1411_attach,
489 &hcw_s5h1411_config,
490 &cx->i2c_adap[0]);
491 if (dvb->fe != NULL)
492 dvb_attach(tda18271_attach, dvb->fe,
493 0x60, &cx->i2c_adap[0],
494 &hauppauge_tda18271_config);
495 break;
458 case CX18_CARD_LEADTEK_DVR3100H: 496 case CX18_CARD_LEADTEK_DVR3100H:
459 dvb->fe = dvb_attach(zl10353_attach, 497 dvb->fe = dvb_attach(zl10353_attach,
460 &leadtek_dvr3100h_demod, 498 &leadtek_dvr3100h_demod,
diff --git a/drivers/media/video/cx23885/cx23885-i2c.c b/drivers/media/video/cx23885/cx23885-i2c.c
index ed3d8f55029b..307ff543c254 100644
--- a/drivers/media/video/cx23885/cx23885-i2c.c
+++ b/drivers/media/video/cx23885/cx23885-i2c.c
@@ -122,10 +122,6 @@ static int i2c_sendbytes(struct i2c_adapter *i2c_adap,
122 122
123 if (!i2c_wait_done(i2c_adap)) 123 if (!i2c_wait_done(i2c_adap))
124 goto eio; 124 goto eio;
125 if (!i2c_slave_did_ack(i2c_adap)) {
126 retval = -ENXIO;
127 goto err;
128 }
129 if (i2c_debug) { 125 if (i2c_debug) {
130 printk(" <W %02x %02x", msg->addr << 1, msg->buf[0]); 126 printk(" <W %02x %02x", msg->addr << 1, msg->buf[0]);
131 if (!(ctrl & I2C_NOSTOP)) 127 if (!(ctrl & I2C_NOSTOP))
@@ -158,7 +154,6 @@ static int i2c_sendbytes(struct i2c_adapter *i2c_adap,
158 154
159 eio: 155 eio:
160 retval = -EIO; 156 retval = -EIO;
161 err:
162 if (i2c_debug) 157 if (i2c_debug)
163 printk(KERN_ERR " ERR: %d\n", retval); 158 printk(KERN_ERR " ERR: %d\n", retval);
164 return retval; 159 return retval;
@@ -209,10 +204,6 @@ static int i2c_readbytes(struct i2c_adapter *i2c_adap,
209 204
210 if (!i2c_wait_done(i2c_adap)) 205 if (!i2c_wait_done(i2c_adap))
211 goto eio; 206 goto eio;
212 if (cnt == 0 && !i2c_slave_did_ack(i2c_adap)) {
213 retval = -ENXIO;
214 goto err;
215 }
216 msg->buf[cnt] = cx_read(bus->reg_rdata) & 0xff; 207 msg->buf[cnt] = cx_read(bus->reg_rdata) & 0xff;
217 if (i2c_debug) { 208 if (i2c_debug) {
218 dprintk(1, " %02x", msg->buf[cnt]); 209 dprintk(1, " %02x", msg->buf[cnt]);
@@ -224,7 +215,6 @@ static int i2c_readbytes(struct i2c_adapter *i2c_adap,
224 215
225 eio: 216 eio:
226 retval = -EIO; 217 retval = -EIO;
227 err:
228 if (i2c_debug) 218 if (i2c_debug)
229 printk(KERN_ERR " ERR: %d\n", retval); 219 printk(KERN_ERR " ERR: %d\n", retval);
230 return retval; 220 return retval;
diff --git a/drivers/media/video/cx25840/cx25840-core.c b/drivers/media/video/cx25840/cx25840-core.c
index 6fc09dd41b9d..35796e035247 100644
--- a/drivers/media/video/cx25840/cx25840-core.c
+++ b/drivers/media/video/cx25840/cx25840-core.c
@@ -2015,7 +2015,8 @@ static int cx25840_probe(struct i2c_client *client,
2015 kfree(state); 2015 kfree(state);
2016 return err; 2016 return err;
2017 } 2017 }
2018 v4l2_ctrl_cluster(2, &state->volume); 2018 if (!is_cx2583x(state))
2019 v4l2_ctrl_cluster(2, &state->volume);
2019 v4l2_ctrl_handler_setup(&state->hdl); 2020 v4l2_ctrl_handler_setup(&state->hdl);
2020 2021
2021 if (client->dev.platform_data) { 2022 if (client->dev.platform_data) {
diff --git a/drivers/media/video/ivtv/ivtv-irq.c b/drivers/media/video/ivtv/ivtv-irq.c
index 9b4faf009196..9c29e964d400 100644
--- a/drivers/media/video/ivtv/ivtv-irq.c
+++ b/drivers/media/video/ivtv/ivtv-irq.c
@@ -628,22 +628,66 @@ static void ivtv_irq_enc_pio_complete(struct ivtv *itv)
628static void ivtv_irq_dma_err(struct ivtv *itv) 628static void ivtv_irq_dma_err(struct ivtv *itv)
629{ 629{
630 u32 data[CX2341X_MBOX_MAX_DATA]; 630 u32 data[CX2341X_MBOX_MAX_DATA];
631 u32 status;
631 632
632 del_timer(&itv->dma_timer); 633 del_timer(&itv->dma_timer);
634
633 ivtv_api_get_data(&itv->enc_mbox, IVTV_MBOX_DMA_END, 2, data); 635 ivtv_api_get_data(&itv->enc_mbox, IVTV_MBOX_DMA_END, 2, data);
636 status = read_reg(IVTV_REG_DMASTATUS);
634 IVTV_DEBUG_WARN("DMA ERROR %08x %08x %08x %d\n", data[0], data[1], 637 IVTV_DEBUG_WARN("DMA ERROR %08x %08x %08x %d\n", data[0], data[1],
635 read_reg(IVTV_REG_DMASTATUS), itv->cur_dma_stream); 638 status, itv->cur_dma_stream);
636 write_reg(read_reg(IVTV_REG_DMASTATUS) & 3, IVTV_REG_DMASTATUS); 639 /*
640 * We do *not* write back to the IVTV_REG_DMASTATUS register to
641 * clear the error status, if either the encoder write (0x02) or
642 * decoder read (0x01) bus master DMA operation do not indicate
643 * completed. We can race with the DMA engine, which may have
644 * transitioned to completed status *after* we read the register.
645 * Setting a IVTV_REG_DMASTATUS flag back to "busy" status, after the
646 * DMA engine has completed, will cause the DMA engine to stop working.
647 */
648 status &= 0x3;
649 if (status == 0x3)
650 write_reg(status, IVTV_REG_DMASTATUS);
651
637 if (!test_bit(IVTV_F_I_UDMA, &itv->i_flags) && 652 if (!test_bit(IVTV_F_I_UDMA, &itv->i_flags) &&
638 itv->cur_dma_stream >= 0 && itv->cur_dma_stream < IVTV_MAX_STREAMS) { 653 itv->cur_dma_stream >= 0 && itv->cur_dma_stream < IVTV_MAX_STREAMS) {
639 struct ivtv_stream *s = &itv->streams[itv->cur_dma_stream]; 654 struct ivtv_stream *s = &itv->streams[itv->cur_dma_stream];
640 655
641 /* retry */ 656 if (s->type >= IVTV_DEC_STREAM_TYPE_MPG) {
642 if (s->type >= IVTV_DEC_STREAM_TYPE_MPG) 657 /* retry */
658 /*
659 * FIXME - handle cases of DMA error similar to
660 * encoder below, except conditioned on status & 0x1
661 */
643 ivtv_dma_dec_start(s); 662 ivtv_dma_dec_start(s);
644 else 663 return;
645 ivtv_dma_enc_start(s); 664 } else {
646 return; 665 if ((status & 0x2) == 0) {
666 /*
667 * CX2341x Bus Master DMA write is ongoing.
668 * Reset the timer and let it complete.
669 */
670 itv->dma_timer.expires =
671 jiffies + msecs_to_jiffies(600);
672 add_timer(&itv->dma_timer);
673 return;
674 }
675
676 if (itv->dma_retries < 3) {
677 /*
678 * CX2341x Bus Master DMA write has ended.
679 * Retry the write, starting with the first
680 * xfer segment. Just retrying the current
681 * segment is not sufficient.
682 */
683 s->sg_processed = 0;
684 itv->dma_retries++;
685 ivtv_dma_enc_start_xfer(s);
686 return;
687 }
688 /* Too many retries, give up on this one */
689 }
690
647 } 691 }
648 if (test_bit(IVTV_F_I_UDMA, &itv->i_flags)) { 692 if (test_bit(IVTV_F_I_UDMA, &itv->i_flags)) {
649 ivtv_udma_start(itv); 693 ivtv_udma_start(itv);
diff --git a/drivers/media/video/mem2mem_testdev.c b/drivers/media/video/mem2mem_testdev.c
index c179041d91f8..e7e717800ee2 100644
--- a/drivers/media/video/mem2mem_testdev.c
+++ b/drivers/media/video/mem2mem_testdev.c
@@ -1011,7 +1011,6 @@ static int m2mtest_remove(struct platform_device *pdev)
1011 v4l2_m2m_release(dev->m2m_dev); 1011 v4l2_m2m_release(dev->m2m_dev);
1012 del_timer_sync(&dev->timer); 1012 del_timer_sync(&dev->timer);
1013 video_unregister_device(dev->vfd); 1013 video_unregister_device(dev->vfd);
1014 video_device_release(dev->vfd);
1015 v4l2_device_unregister(&dev->v4l2_dev); 1014 v4l2_device_unregister(&dev->v4l2_dev);
1016 kfree(dev); 1015 kfree(dev);
1017 1016
diff --git a/drivers/media/video/s2255drv.c b/drivers/media/video/s2255drv.c
index b63f8cafa671..561909b65ce6 100644
--- a/drivers/media/video/s2255drv.c
+++ b/drivers/media/video/s2255drv.c
@@ -57,7 +57,7 @@
57#include <linux/usb.h> 57#include <linux/usb.h>
58 58
59#define S2255_MAJOR_VERSION 1 59#define S2255_MAJOR_VERSION 1
60#define S2255_MINOR_VERSION 20 60#define S2255_MINOR_VERSION 21
61#define S2255_RELEASE 0 61#define S2255_RELEASE 0
62#define S2255_VERSION KERNEL_VERSION(S2255_MAJOR_VERSION, \ 62#define S2255_VERSION KERNEL_VERSION(S2255_MAJOR_VERSION, \
63 S2255_MINOR_VERSION, \ 63 S2255_MINOR_VERSION, \
@@ -312,9 +312,9 @@ struct s2255_fh {
312}; 312};
313 313
314/* current cypress EEPROM firmware version */ 314/* current cypress EEPROM firmware version */
315#define S2255_CUR_USB_FWVER ((3 << 8) | 6) 315#define S2255_CUR_USB_FWVER ((3 << 8) | 11)
316/* current DSP FW version */ 316/* current DSP FW version */
317#define S2255_CUR_DSP_FWVER 8 317#define S2255_CUR_DSP_FWVER 10102
318/* Need DSP version 5+ for video status feature */ 318/* Need DSP version 5+ for video status feature */
319#define S2255_MIN_DSP_STATUS 5 319#define S2255_MIN_DSP_STATUS 5
320#define S2255_MIN_DSP_COLORFILTER 8 320#define S2255_MIN_DSP_COLORFILTER 8
@@ -492,9 +492,11 @@ static void planar422p_to_yuv_packed(const unsigned char *in,
492 492
493static void s2255_reset_dsppower(struct s2255_dev *dev) 493static void s2255_reset_dsppower(struct s2255_dev *dev)
494{ 494{
495 s2255_vendor_req(dev, 0x40, 0x0b0b, 0x0b0b, NULL, 0, 1); 495 s2255_vendor_req(dev, 0x40, 0x0b0b, 0x0b01, NULL, 0, 1);
496 msleep(10); 496 msleep(10);
497 s2255_vendor_req(dev, 0x50, 0x0000, 0x0000, NULL, 0, 1); 497 s2255_vendor_req(dev, 0x50, 0x0000, 0x0000, NULL, 0, 1);
498 msleep(600);
499 s2255_vendor_req(dev, 0x10, 0x0000, 0x0000, NULL, 0, 1);
498 return; 500 return;
499} 501}
500 502
diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
index 6a1f94042612..c45e6305b26f 100644
--- a/drivers/mfd/asic3.c
+++ b/drivers/mfd/asic3.c
@@ -143,9 +143,9 @@ static void asic3_irq_demux(unsigned int irq, struct irq_desc *desc)
143 unsigned long flags; 143 unsigned long flags;
144 struct asic3 *asic; 144 struct asic3 *asic;
145 145
146 desc->chip->ack(irq); 146 desc->irq_data.chip->irq_ack(&desc->irq_data);
147 147
148 asic = desc->handler_data; 148 asic = get_irq_data(irq);
149 149
150 for (iter = 0 ; iter < MAX_ASIC_ISR_LOOPS; iter++) { 150 for (iter = 0 ; iter < MAX_ASIC_ISR_LOOPS; iter++) {
151 u32 status; 151 u32 status;
diff --git a/drivers/mfd/davinci_voicecodec.c b/drivers/mfd/davinci_voicecodec.c
index 33c923d215c7..fdd8a1b8bc67 100644
--- a/drivers/mfd/davinci_voicecodec.c
+++ b/drivers/mfd/davinci_voicecodec.c
@@ -118,12 +118,12 @@ static int __init davinci_vc_probe(struct platform_device *pdev)
118 118
119 /* Voice codec interface client */ 119 /* Voice codec interface client */
120 cell = &davinci_vc->cells[DAVINCI_VC_VCIF_CELL]; 120 cell = &davinci_vc->cells[DAVINCI_VC_VCIF_CELL];
121 cell->name = "davinci_vcif"; 121 cell->name = "davinci-vcif";
122 cell->driver_data = davinci_vc; 122 cell->driver_data = davinci_vc;
123 123
124 /* Voice codec CQ93VC client */ 124 /* Voice codec CQ93VC client */
125 cell = &davinci_vc->cells[DAVINCI_VC_CQ93VC_CELL]; 125 cell = &davinci_vc->cells[DAVINCI_VC_CQ93VC_CELL];
126 cell->name = "cq93vc"; 126 cell->name = "cq93vc-codec";
127 cell->driver_data = davinci_vc; 127 cell->driver_data = davinci_vc;
128 128
129 ret = mfd_add_devices(&pdev->dev, pdev->id, davinci_vc->cells, 129 ret = mfd_add_devices(&pdev->dev, pdev->id, davinci_vc->cells,
diff --git a/drivers/mfd/tps6586x.c b/drivers/mfd/tps6586x.c
index 627cf577b16d..e9018d1394ee 100644
--- a/drivers/mfd/tps6586x.c
+++ b/drivers/mfd/tps6586x.c
@@ -150,12 +150,12 @@ static inline int __tps6586x_write(struct i2c_client *client,
150static inline int __tps6586x_writes(struct i2c_client *client, int reg, 150static inline int __tps6586x_writes(struct i2c_client *client, int reg,
151 int len, uint8_t *val) 151 int len, uint8_t *val)
152{ 152{
153 int ret; 153 int ret, i;
154 154
155 ret = i2c_smbus_write_i2c_block_data(client, reg, len, val); 155 for (i = 0; i < len; i++) {
156 if (ret < 0) { 156 ret = __tps6586x_write(client, reg + i, *(val + i));
157 dev_err(&client->dev, "failed writings to 0x%02x\n", reg); 157 if (ret < 0)
158 return ret; 158 return ret;
159 } 159 }
160 160
161 return 0; 161 return 0;
diff --git a/drivers/mfd/ucb1x00-ts.c b/drivers/mfd/ucb1x00-ts.c
index 000cb414a78a..92b85e28a15e 100644
--- a/drivers/mfd/ucb1x00-ts.c
+++ b/drivers/mfd/ucb1x00-ts.c
@@ -385,12 +385,18 @@ static int ucb1x00_ts_add(struct ucb1x00_dev *dev)
385 idev->close = ucb1x00_ts_close; 385 idev->close = ucb1x00_ts_close;
386 386
387 __set_bit(EV_ABS, idev->evbit); 387 __set_bit(EV_ABS, idev->evbit);
388 __set_bit(ABS_X, idev->absbit);
389 __set_bit(ABS_Y, idev->absbit);
390 __set_bit(ABS_PRESSURE, idev->absbit);
391 388
392 input_set_drvdata(idev, ts); 389 input_set_drvdata(idev, ts);
393 390
391 ucb1x00_adc_enable(ts->ucb);
392 ts->x_res = ucb1x00_ts_read_xres(ts);
393 ts->y_res = ucb1x00_ts_read_yres(ts);
394 ucb1x00_adc_disable(ts->ucb);
395
396 input_set_abs_params(idev, ABS_X, 0, ts->x_res, 0, 0);
397 input_set_abs_params(idev, ABS_Y, 0, ts->y_res, 0, 0);
398 input_set_abs_params(idev, ABS_PRESSURE, 0, 0, 0, 0);
399
394 err = input_register_device(idev); 400 err = input_register_device(idev);
395 if (err) 401 if (err)
396 goto fail; 402 goto fail;
diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c
index 41233c7fa581..f4016a075fd6 100644
--- a/drivers/mfd/wm8994-core.c
+++ b/drivers/mfd/wm8994-core.c
@@ -246,6 +246,16 @@ static int wm8994_suspend(struct device *dev)
246 struct wm8994 *wm8994 = dev_get_drvdata(dev); 246 struct wm8994 *wm8994 = dev_get_drvdata(dev);
247 int ret; 247 int ret;
248 248
249 /* Don't actually go through with the suspend if the CODEC is
250 * still active (eg, for audio passthrough from CP. */
251 ret = wm8994_reg_read(wm8994, WM8994_POWER_MANAGEMENT_1);
252 if (ret < 0) {
253 dev_err(dev, "Failed to read power status: %d\n", ret);
254 } else if (ret & WM8994_VMID_SEL_MASK) {
255 dev_dbg(dev, "CODEC still active, ignoring suspend\n");
256 return 0;
257 }
258
249 /* GPIO configuration state is saved here since we may be configuring 259 /* GPIO configuration state is saved here since we may be configuring
250 * the GPIO alternate functions even if we're not using the gpiolib 260 * the GPIO alternate functions even if we're not using the gpiolib
251 * driver for them. 261 * driver for them.
@@ -261,6 +271,8 @@ static int wm8994_suspend(struct device *dev)
261 if (ret < 0) 271 if (ret < 0)
262 dev_err(dev, "Failed to save LDO registers: %d\n", ret); 272 dev_err(dev, "Failed to save LDO registers: %d\n", ret);
263 273
274 wm8994->suspended = true;
275
264 ret = regulator_bulk_disable(wm8994->num_supplies, 276 ret = regulator_bulk_disable(wm8994->num_supplies,
265 wm8994->supplies); 277 wm8994->supplies);
266 if (ret != 0) { 278 if (ret != 0) {
@@ -276,6 +288,10 @@ static int wm8994_resume(struct device *dev)
276 struct wm8994 *wm8994 = dev_get_drvdata(dev); 288 struct wm8994 *wm8994 = dev_get_drvdata(dev);
277 int ret; 289 int ret;
278 290
291 /* We may have lied to the PM core about suspending */
292 if (!wm8994->suspended)
293 return 0;
294
279 ret = regulator_bulk_enable(wm8994->num_supplies, 295 ret = regulator_bulk_enable(wm8994->num_supplies,
280 wm8994->supplies); 296 wm8994->supplies);
281 if (ret != 0) { 297 if (ret != 0) {
@@ -298,6 +314,8 @@ static int wm8994_resume(struct device *dev)
298 if (ret < 0) 314 if (ret < 0)
299 dev_err(dev, "Failed to restore GPIO registers: %d\n", ret); 315 dev_err(dev, "Failed to restore GPIO registers: %d\n", ret);
300 316
317 wm8994->suspended = false;
318
301 return 0; 319 return 0;
302} 320}
303#endif 321#endif
diff --git a/drivers/misc/bmp085.c b/drivers/misc/bmp085.c
index 63ee4c1a5315..b6e1c9a6679e 100644
--- a/drivers/misc/bmp085.c
+++ b/drivers/misc/bmp085.c
@@ -449,6 +449,7 @@ static const struct i2c_device_id bmp085_id[] = {
449 { "bmp085", 0 }, 449 { "bmp085", 0 },
450 { } 450 { }
451}; 451};
452MODULE_DEVICE_TABLE(i2c, bmp085_id);
452 453
453static struct i2c_driver bmp085_driver = { 454static struct i2c_driver bmp085_driver = {
454 .driver = { 455 .driver = {
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 6625c057be05..150b5f3cd401 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1529,7 +1529,7 @@ void mmc_rescan(struct work_struct *work)
1529 * still present 1529 * still present
1530 */ 1530 */
1531 if (host->bus_ops && host->bus_ops->detect && !host->bus_dead 1531 if (host->bus_ops && host->bus_ops->detect && !host->bus_dead
1532 && mmc_card_is_removable(host)) 1532 && !(host->caps & MMC_CAP_NONREMOVABLE))
1533 host->bus_ops->detect(host); 1533 host->bus_ops->detect(host);
1534 1534
1535 /* 1535 /*
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 5c4a54d9b6a4..ebc62ad4cc56 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -792,7 +792,6 @@ int mmc_attach_sdio(struct mmc_host *host)
792 */ 792 */
793 mmc_release_host(host); 793 mmc_release_host(host);
794 err = mmc_add_card(host->card); 794 err = mmc_add_card(host->card);
795 mmc_claim_host(host);
796 if (err) 795 if (err)
797 goto remove_added; 796 goto remove_added;
798 797
@@ -805,12 +804,12 @@ int mmc_attach_sdio(struct mmc_host *host)
805 goto remove_added; 804 goto remove_added;
806 } 805 }
807 806
807 mmc_claim_host(host);
808 return 0; 808 return 0;
809 809
810 810
811remove_added: 811remove_added:
812 /* Remove without lock if the device has been added. */ 812 /* Remove without lock if the device has been added. */
813 mmc_release_host(host);
814 mmc_sdio_remove(host); 813 mmc_sdio_remove(host);
815 mmc_claim_host(host); 814 mmc_claim_host(host);
816remove: 815remove:
diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index a8c3e1c9b02a..4aaa88f8ab5f 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -1230,10 +1230,32 @@ static int inval_cache_and_wait_for_operation(
1230 sleep_time = chip_op_time / 2; 1230 sleep_time = chip_op_time / 2;
1231 1231
1232 for (;;) { 1232 for (;;) {
1233 if (chip->state != chip_state) {
1234 /* Someone's suspended the operation: sleep */
1235 DECLARE_WAITQUEUE(wait, current);
1236 set_current_state(TASK_UNINTERRUPTIBLE);
1237 add_wait_queue(&chip->wq, &wait);
1238 mutex_unlock(&chip->mutex);
1239 schedule();
1240 remove_wait_queue(&chip->wq, &wait);
1241 mutex_lock(&chip->mutex);
1242 continue;
1243 }
1244
1233 status = map_read(map, cmd_adr); 1245 status = map_read(map, cmd_adr);
1234 if (map_word_andequal(map, status, status_OK, status_OK)) 1246 if (map_word_andequal(map, status, status_OK, status_OK))
1235 break; 1247 break;
1236 1248
1249 if (chip->erase_suspended && chip_state == FL_ERASING) {
1250 /* Erase suspend occured while sleep: reset timeout */
1251 timeo = reset_timeo;
1252 chip->erase_suspended = 0;
1253 }
1254 if (chip->write_suspended && chip_state == FL_WRITING) {
1255 /* Write suspend occured while sleep: reset timeout */
1256 timeo = reset_timeo;
1257 chip->write_suspended = 0;
1258 }
1237 if (!timeo) { 1259 if (!timeo) {
1238 map_write(map, CMD(0x70), cmd_adr); 1260 map_write(map, CMD(0x70), cmd_adr);
1239 chip->state = FL_STATUS; 1261 chip->state = FL_STATUS;
@@ -1257,27 +1279,6 @@ static int inval_cache_and_wait_for_operation(
1257 timeo--; 1279 timeo--;
1258 } 1280 }
1259 mutex_lock(&chip->mutex); 1281 mutex_lock(&chip->mutex);
1260
1261 while (chip->state != chip_state) {
1262 /* Someone's suspended the operation: sleep */
1263 DECLARE_WAITQUEUE(wait, current);
1264 set_current_state(TASK_UNINTERRUPTIBLE);
1265 add_wait_queue(&chip->wq, &wait);
1266 mutex_unlock(&chip->mutex);
1267 schedule();
1268 remove_wait_queue(&chip->wq, &wait);
1269 mutex_lock(&chip->mutex);
1270 }
1271 if (chip->erase_suspended && chip_state == FL_ERASING) {
1272 /* Erase suspend occured while sleep: reset timeout */
1273 timeo = reset_timeo;
1274 chip->erase_suspended = 0;
1275 }
1276 if (chip->write_suspended && chip_state == FL_WRITING) {
1277 /* Write suspend occured while sleep: reset timeout */
1278 timeo = reset_timeo;
1279 chip->write_suspended = 0;
1280 }
1281 } 1282 }
1282 1283
1283 /* Done and happy. */ 1284 /* Done and happy. */
diff --git a/drivers/mtd/chips/jedec_probe.c b/drivers/mtd/chips/jedec_probe.c
index d72a5fb2d041..4e1be51cc122 100644
--- a/drivers/mtd/chips/jedec_probe.c
+++ b/drivers/mtd/chips/jedec_probe.c
@@ -1935,14 +1935,14 @@ static void jedec_reset(u32 base, struct map_info *map, struct cfi_private *cfi)
1935} 1935}
1936 1936
1937 1937
1938static int cfi_jedec_setup(struct cfi_private *p_cfi, int index) 1938static int cfi_jedec_setup(struct map_info *map, struct cfi_private *cfi, int index)
1939{ 1939{
1940 int i,num_erase_regions; 1940 int i,num_erase_regions;
1941 uint8_t uaddr; 1941 uint8_t uaddr;
1942 1942
1943 if (! (jedec_table[index].devtypes & p_cfi->device_type)) { 1943 if (!(jedec_table[index].devtypes & cfi->device_type)) {
1944 DEBUG(MTD_DEBUG_LEVEL1, "Rejecting potential %s with incompatible %d-bit device type\n", 1944 DEBUG(MTD_DEBUG_LEVEL1, "Rejecting potential %s with incompatible %d-bit device type\n",
1945 jedec_table[index].name, 4 * (1<<p_cfi->device_type)); 1945 jedec_table[index].name, 4 * (1<<cfi->device_type));
1946 return 0; 1946 return 0;
1947 } 1947 }
1948 1948
@@ -1950,27 +1950,28 @@ static int cfi_jedec_setup(struct cfi_private *p_cfi, int index)
1950 1950
1951 num_erase_regions = jedec_table[index].nr_regions; 1951 num_erase_regions = jedec_table[index].nr_regions;
1952 1952
1953 p_cfi->cfiq = kmalloc(sizeof(struct cfi_ident) + num_erase_regions * 4, GFP_KERNEL); 1953 cfi->cfiq = kmalloc(sizeof(struct cfi_ident) + num_erase_regions * 4, GFP_KERNEL);
1954 if (!p_cfi->cfiq) { 1954 if (!cfi->cfiq) {
1955 //xx printk(KERN_WARNING "%s: kmalloc failed for CFI ident structure\n", map->name); 1955 //xx printk(KERN_WARNING "%s: kmalloc failed for CFI ident structure\n", map->name);
1956 return 0; 1956 return 0;
1957 } 1957 }
1958 1958
1959 memset(p_cfi->cfiq,0,sizeof(struct cfi_ident)); 1959 memset(cfi->cfiq, 0, sizeof(struct cfi_ident));
1960 1960
1961 p_cfi->cfiq->P_ID = jedec_table[index].cmd_set; 1961 cfi->cfiq->P_ID = jedec_table[index].cmd_set;
1962 p_cfi->cfiq->NumEraseRegions = jedec_table[index].nr_regions; 1962 cfi->cfiq->NumEraseRegions = jedec_table[index].nr_regions;
1963 p_cfi->cfiq->DevSize = jedec_table[index].dev_size; 1963 cfi->cfiq->DevSize = jedec_table[index].dev_size;
1964 p_cfi->cfi_mode = CFI_MODE_JEDEC; 1964 cfi->cfi_mode = CFI_MODE_JEDEC;
1965 cfi->sector_erase_cmd = CMD(0x30);
1965 1966
1966 for (i=0; i<num_erase_regions; i++){ 1967 for (i=0; i<num_erase_regions; i++){
1967 p_cfi->cfiq->EraseRegionInfo[i] = jedec_table[index].regions[i]; 1968 cfi->cfiq->EraseRegionInfo[i] = jedec_table[index].regions[i];
1968 } 1969 }
1969 p_cfi->cmdset_priv = NULL; 1970 cfi->cmdset_priv = NULL;
1970 1971
1971 /* This may be redundant for some cases, but it doesn't hurt */ 1972 /* This may be redundant for some cases, but it doesn't hurt */
1972 p_cfi->mfr = jedec_table[index].mfr_id; 1973 cfi->mfr = jedec_table[index].mfr_id;
1973 p_cfi->id = jedec_table[index].dev_id; 1974 cfi->id = jedec_table[index].dev_id;
1974 1975
1975 uaddr = jedec_table[index].uaddr; 1976 uaddr = jedec_table[index].uaddr;
1976 1977
@@ -1978,8 +1979,8 @@ static int cfi_jedec_setup(struct cfi_private *p_cfi, int index)
1978 our brains explode when we see the datasheets talking about address 1979 our brains explode when we see the datasheets talking about address
1979 lines numbered from A-1 to A18. The CFI table has unlock addresses 1980 lines numbered from A-1 to A18. The CFI table has unlock addresses
1980 in device-words according to the mode the device is connected in */ 1981 in device-words according to the mode the device is connected in */
1981 p_cfi->addr_unlock1 = unlock_addrs[uaddr].addr1 / p_cfi->device_type; 1982 cfi->addr_unlock1 = unlock_addrs[uaddr].addr1 / cfi->device_type;
1982 p_cfi->addr_unlock2 = unlock_addrs[uaddr].addr2 / p_cfi->device_type; 1983 cfi->addr_unlock2 = unlock_addrs[uaddr].addr2 / cfi->device_type;
1983 1984
1984 return 1; /* ok */ 1985 return 1; /* ok */
1985} 1986}
@@ -2175,7 +2176,7 @@ static int jedec_probe_chip(struct map_info *map, __u32 base,
2175 "MTD %s(): matched device 0x%x,0x%x unlock_addrs: 0x%.4x 0x%.4x\n", 2176 "MTD %s(): matched device 0x%x,0x%x unlock_addrs: 0x%.4x 0x%.4x\n",
2176 __func__, cfi->mfr, cfi->id, 2177 __func__, cfi->mfr, cfi->id,
2177 cfi->addr_unlock1, cfi->addr_unlock2 ); 2178 cfi->addr_unlock1, cfi->addr_unlock2 );
2178 if (!cfi_jedec_setup(cfi, i)) 2179 if (!cfi_jedec_setup(map, cfi, i))
2179 return 0; 2180 return 0;
2180 goto ok_out; 2181 goto ok_out;
2181 } 2182 }
diff --git a/drivers/mtd/maps/amd76xrom.c b/drivers/mtd/maps/amd76xrom.c
index 77d64ce19e9f..92de7e3a49a5 100644
--- a/drivers/mtd/maps/amd76xrom.c
+++ b/drivers/mtd/maps/amd76xrom.c
@@ -151,6 +151,7 @@ static int __devinit amd76xrom_init_one (struct pci_dev *pdev,
151 printk(KERN_ERR MOD_NAME 151 printk(KERN_ERR MOD_NAME
152 " %s(): Unable to register resource %pR - kernel bug?\n", 152 " %s(): Unable to register resource %pR - kernel bug?\n",
153 __func__, &window->rsrc); 153 __func__, &window->rsrc);
154 return -EBUSY;
154 } 155 }
155 156
156 157
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index cb20c67995d8..e0a2373bf0e2 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -413,7 +413,6 @@ error3:
413error2: 413error2:
414 list_del(&new->list); 414 list_del(&new->list);
415error1: 415error1:
416 kfree(new);
417 return ret; 416 return ret;
418} 417}
419 418
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 15682ec8530e..28af71c61834 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -968,6 +968,6 @@ static void __exit omap_nand_exit(void)
968module_init(omap_nand_init); 968module_init(omap_nand_init);
969module_exit(omap_nand_exit); 969module_exit(omap_nand_exit);
970 970
971MODULE_ALIAS(DRIVER_NAME); 971MODULE_ALIAS("platform:" DRIVER_NAME);
972MODULE_LICENSE("GPL"); 972MODULE_LICENSE("GPL");
973MODULE_DESCRIPTION("Glue layer for NAND flash on TI OMAP boards"); 973MODULE_DESCRIPTION("Glue layer for NAND flash on TI OMAP boards");
diff --git a/drivers/mtd/onenand/generic.c b/drivers/mtd/onenand/generic.c
index e78914938c5c..ac08750748a3 100644
--- a/drivers/mtd/onenand/generic.c
+++ b/drivers/mtd/onenand/generic.c
@@ -131,7 +131,7 @@ static struct platform_driver generic_onenand_driver = {
131 .remove = __devexit_p(generic_onenand_remove), 131 .remove = __devexit_p(generic_onenand_remove),
132}; 132};
133 133
134MODULE_ALIAS(DRIVER_NAME); 134MODULE_ALIAS("platform:" DRIVER_NAME);
135 135
136static int __init generic_onenand_init(void) 136static int __init generic_onenand_init(void)
137{ 137{
diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c
index ac31f461cc1c..c849cacf4b2f 100644
--- a/drivers/mtd/onenand/omap2.c
+++ b/drivers/mtd/onenand/omap2.c
@@ -860,7 +860,7 @@ static void __exit omap2_onenand_exit(void)
860module_init(omap2_onenand_init); 860module_init(omap2_onenand_init);
861module_exit(omap2_onenand_exit); 861module_exit(omap2_onenand_exit);
862 862
863MODULE_ALIAS(DRIVER_NAME); 863MODULE_ALIAS("platform:" DRIVER_NAME);
864MODULE_LICENSE("GPL"); 864MODULE_LICENSE("GPL");
865MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>"); 865MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>");
866MODULE_DESCRIPTION("Glue layer for OneNAND flash on OMAP2 / OMAP3"); 866MODULE_DESCRIPTION("Glue layer for OneNAND flash on OMAP2 / OMAP3");
diff --git a/drivers/net/ariadne.c b/drivers/net/ariadne.c
index 39214e512452..7ca0eded2561 100644
--- a/drivers/net/ariadne.c
+++ b/drivers/net/ariadne.c
@@ -425,11 +425,6 @@ static irqreturn_t ariadne_interrupt(int irq, void *data)
425 int csr0, boguscnt; 425 int csr0, boguscnt;
426 int handled = 0; 426 int handled = 0;
427 427
428 if (dev == NULL) {
429 printk(KERN_WARNING "ariadne_interrupt(): irq for unknown device.\n");
430 return IRQ_NONE;
431 }
432
433 lance->RAP = CSR0; /* PCnet-ISA Controller Status */ 428 lance->RAP = CSR0; /* PCnet-ISA Controller Status */
434 429
435 if (!(lance->RDP & INTR)) /* Check if any interrupt has been */ 430 if (!(lance->RDP & INTR)) /* Check if any interrupt has been */
diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h
index 653c62475cb6..8849699c66c4 100644
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -22,7 +22,7 @@
22 * (you will need to reboot afterwards) */ 22 * (you will need to reboot afterwards) */
23/* #define BNX2X_STOP_ON_ERROR */ 23/* #define BNX2X_STOP_ON_ERROR */
24 24
25#define DRV_MODULE_VERSION "1.62.00-5" 25#define DRV_MODULE_VERSION "1.62.00-6"
26#define DRV_MODULE_RELDATE "2011/01/30" 26#define DRV_MODULE_RELDATE "2011/01/30"
27#define BNX2X_BC_VER 0x040200 27#define BNX2X_BC_VER 0x040200
28 28
@@ -1211,6 +1211,7 @@ struct bnx2x {
1211 /* DCBX Negotation results */ 1211 /* DCBX Negotation results */
1212 struct dcbx_features dcbx_local_feat; 1212 struct dcbx_features dcbx_local_feat;
1213 u32 dcbx_error; 1213 u32 dcbx_error;
1214 u32 pending_max;
1214}; 1215};
1215 1216
1216/** 1217/**
@@ -1613,19 +1614,23 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms,
1613#define BNX2X_BTR 4 1614#define BNX2X_BTR 4
1614#define MAX_SPQ_PENDING 8 1615#define MAX_SPQ_PENDING 8
1615 1616
1616 1617/* CMNG constants, as derived from system spec calculations */
1617/* CMNG constants 1618/* default MIN rate in case VNIC min rate is configured to zero - 100Mbps */
1618 derived from lab experiments, and not from system spec calculations !!! */ 1619#define DEF_MIN_RATE 100
1619#define DEF_MIN_RATE 100 1620/* resolution of the rate shaping timer - 400 usec */
1620/* resolution of the rate shaping timer - 100 usec */ 1621#define RS_PERIODIC_TIMEOUT_USEC 400
1621#define RS_PERIODIC_TIMEOUT_USEC 100
1622/* resolution of fairness algorithm in usecs -
1623 coefficient for calculating the actual t fair */
1624#define T_FAIR_COEF 10000000
1625/* number of bytes in single QM arbitration cycle - 1622/* number of bytes in single QM arbitration cycle -
1626 coefficient for calculating the fairness timer */ 1623 * coefficient for calculating the fairness timer */
1627#define QM_ARB_BYTES 40000 1624#define QM_ARB_BYTES 160000
1628#define FAIR_MEM 2 1625/* resolution of Min algorithm 1:100 */
1626#define MIN_RES 100
1627/* how many bytes above threshold for the minimal credit of Min algorithm*/
1628#define MIN_ABOVE_THRESH 32768
1629/* Fairness algorithm integration time coefficient -
1630 * for calculating the actual Tfair */
1631#define T_FAIR_COEF ((MIN_ABOVE_THRESH + QM_ARB_BYTES) * 8 * MIN_RES)
1632/* Memory of fairness algorithm . 2 cycles */
1633#define FAIR_MEM 2
1629 1634
1630 1635
1631#define ATTN_NIG_FOR_FUNC (1L << 8) 1636#define ATTN_NIG_FOR_FUNC (1L << 8)
diff --git a/drivers/net/bnx2x/bnx2x_cmn.c b/drivers/net/bnx2x/bnx2x_cmn.c
index 710ce5d04c53..a71b32940533 100644
--- a/drivers/net/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/bnx2x/bnx2x_cmn.c
@@ -259,10 +259,44 @@ static void bnx2x_tpa_start(struct bnx2x_fastpath *fp, u16 queue,
259#endif 259#endif
260} 260}
261 261
262/* Timestamp option length allowed for TPA aggregation:
263 *
264 * nop nop kind length echo val
265 */
266#define TPA_TSTAMP_OPT_LEN 12
267/**
268 * Calculate the approximate value of the MSS for this
269 * aggregation using the first packet of it.
270 *
271 * @param bp
272 * @param parsing_flags Parsing flags from the START CQE
273 * @param len_on_bd Total length of the first packet for the
274 * aggregation.
275 */
276static inline u16 bnx2x_set_lro_mss(struct bnx2x *bp, u16 parsing_flags,
277 u16 len_on_bd)
278{
279 /* TPA arrgregation won't have an IP options and TCP options
280 * other than timestamp.
281 */
282 u16 hdrs_len = ETH_HLEN + sizeof(struct iphdr) + sizeof(struct tcphdr);
283
284
285 /* Check if there was a TCP timestamp, if there is it's will
286 * always be 12 bytes length: nop nop kind length echo val.
287 *
288 * Otherwise FW would close the aggregation.
289 */
290 if (parsing_flags & PARSING_FLAGS_TIME_STAMP_EXIST_FLAG)
291 hdrs_len += TPA_TSTAMP_OPT_LEN;
292
293 return len_on_bd - hdrs_len;
294}
295
262static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp, 296static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp,
263 struct sk_buff *skb, 297 struct sk_buff *skb,
264 struct eth_fast_path_rx_cqe *fp_cqe, 298 struct eth_fast_path_rx_cqe *fp_cqe,
265 u16 cqe_idx) 299 u16 cqe_idx, u16 parsing_flags)
266{ 300{
267 struct sw_rx_page *rx_pg, old_rx_pg; 301 struct sw_rx_page *rx_pg, old_rx_pg;
268 u16 len_on_bd = le16_to_cpu(fp_cqe->len_on_bd); 302 u16 len_on_bd = le16_to_cpu(fp_cqe->len_on_bd);
@@ -275,8 +309,8 @@ static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp,
275 309
276 /* This is needed in order to enable forwarding support */ 310 /* This is needed in order to enable forwarding support */
277 if (frag_size) 311 if (frag_size)
278 skb_shinfo(skb)->gso_size = min((u32)SGE_PAGE_SIZE, 312 skb_shinfo(skb)->gso_size = bnx2x_set_lro_mss(bp, parsing_flags,
279 max(frag_size, (u32)len_on_bd)); 313 len_on_bd);
280 314
281#ifdef BNX2X_STOP_ON_ERROR 315#ifdef BNX2X_STOP_ON_ERROR
282 if (pages > min_t(u32, 8, MAX_SKB_FRAGS)*SGE_PAGE_SIZE*PAGES_PER_SGE) { 316 if (pages > min_t(u32, 8, MAX_SKB_FRAGS)*SGE_PAGE_SIZE*PAGES_PER_SGE) {
@@ -344,6 +378,8 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp,
344 if (likely(new_skb)) { 378 if (likely(new_skb)) {
345 /* fix ip xsum and give it to the stack */ 379 /* fix ip xsum and give it to the stack */
346 /* (no need to map the new skb) */ 380 /* (no need to map the new skb) */
381 u16 parsing_flags =
382 le16_to_cpu(cqe->fast_path_cqe.pars_flags.flags);
347 383
348 prefetch(skb); 384 prefetch(skb);
349 prefetch(((char *)(skb)) + L1_CACHE_BYTES); 385 prefetch(((char *)(skb)) + L1_CACHE_BYTES);
@@ -373,9 +409,9 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp,
373 } 409 }
374 410
375 if (!bnx2x_fill_frag_skb(bp, fp, skb, 411 if (!bnx2x_fill_frag_skb(bp, fp, skb,
376 &cqe->fast_path_cqe, cqe_idx)) { 412 &cqe->fast_path_cqe, cqe_idx,
377 if ((le16_to_cpu(cqe->fast_path_cqe. 413 parsing_flags)) {
378 pars_flags.flags) & PARSING_FLAGS_VLAN)) 414 if (parsing_flags & PARSING_FLAGS_VLAN)
379 __vlan_hwaccel_put_tag(skb, 415 __vlan_hwaccel_put_tag(skb,
380 le16_to_cpu(cqe->fast_path_cqe. 416 le16_to_cpu(cqe->fast_path_cqe.
381 vlan_tag)); 417 vlan_tag));
@@ -703,19 +739,20 @@ u16 bnx2x_get_mf_speed(struct bnx2x *bp)
703{ 739{
704 u16 line_speed = bp->link_vars.line_speed; 740 u16 line_speed = bp->link_vars.line_speed;
705 if (IS_MF(bp)) { 741 if (IS_MF(bp)) {
706 u16 maxCfg = (bp->mf_config[BP_VN(bp)] & 742 u16 maxCfg = bnx2x_extract_max_cfg(bp,
707 FUNC_MF_CFG_MAX_BW_MASK) >> 743 bp->mf_config[BP_VN(bp)]);
708 FUNC_MF_CFG_MAX_BW_SHIFT; 744
709 /* Calculate the current MAX line speed limit for the DCC 745 /* Calculate the current MAX line speed limit for the MF
710 * capable devices 746 * devices
711 */ 747 */
712 if (IS_MF_SD(bp)) { 748 if (IS_MF_SI(bp))
749 line_speed = (line_speed * maxCfg) / 100;
750 else { /* SD mode */
713 u16 vn_max_rate = maxCfg * 100; 751 u16 vn_max_rate = maxCfg * 100;
714 752
715 if (vn_max_rate < line_speed) 753 if (vn_max_rate < line_speed)
716 line_speed = vn_max_rate; 754 line_speed = vn_max_rate;
717 } else /* IS_MF_SI(bp)) */ 755 }
718 line_speed = (line_speed * maxCfg) / 100;
719 } 756 }
720 757
721 return line_speed; 758 return line_speed;
@@ -959,6 +996,23 @@ void bnx2x_free_skbs(struct bnx2x *bp)
959 bnx2x_free_rx_skbs(bp); 996 bnx2x_free_rx_skbs(bp);
960} 997}
961 998
999void bnx2x_update_max_mf_config(struct bnx2x *bp, u32 value)
1000{
1001 /* load old values */
1002 u32 mf_cfg = bp->mf_config[BP_VN(bp)];
1003
1004 if (value != bnx2x_extract_max_cfg(bp, mf_cfg)) {
1005 /* leave all but MAX value */
1006 mf_cfg &= ~FUNC_MF_CFG_MAX_BW_MASK;
1007
1008 /* set new MAX value */
1009 mf_cfg |= (value << FUNC_MF_CFG_MAX_BW_SHIFT)
1010 & FUNC_MF_CFG_MAX_BW_MASK;
1011
1012 bnx2x_fw_command(bp, DRV_MSG_CODE_SET_MF_BW, mf_cfg);
1013 }
1014}
1015
962static void bnx2x_free_msix_irqs(struct bnx2x *bp) 1016static void bnx2x_free_msix_irqs(struct bnx2x *bp)
963{ 1017{
964 int i, offset = 1; 1018 int i, offset = 1;
@@ -1427,6 +1481,11 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
1427 1481
1428 bnx2x_set_eth_mac(bp, 1); 1482 bnx2x_set_eth_mac(bp, 1);
1429 1483
1484 if (bp->pending_max) {
1485 bnx2x_update_max_mf_config(bp, bp->pending_max);
1486 bp->pending_max = 0;
1487 }
1488
1430 if (bp->port.pmf) 1489 if (bp->port.pmf)
1431 bnx2x_initial_phy_init(bp, load_mode); 1490 bnx2x_initial_phy_init(bp, load_mode);
1432 1491
diff --git a/drivers/net/bnx2x/bnx2x_cmn.h b/drivers/net/bnx2x/bnx2x_cmn.h
index 03eb4d68e6bb..85ea7f26b51f 100644
--- a/drivers/net/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/bnx2x/bnx2x_cmn.h
@@ -341,6 +341,15 @@ void bnx2x_dcbx_init(struct bnx2x *bp);
341 */ 341 */
342int bnx2x_set_power_state(struct bnx2x *bp, pci_power_t state); 342int bnx2x_set_power_state(struct bnx2x *bp, pci_power_t state);
343 343
344/**
345 * Updates MAX part of MF configuration in HW
346 * (if required)
347 *
348 * @param bp
349 * @param value
350 */
351void bnx2x_update_max_mf_config(struct bnx2x *bp, u32 value);
352
344/* dev_close main block */ 353/* dev_close main block */
345int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode); 354int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode);
346 355
@@ -1044,4 +1053,24 @@ static inline void storm_memset_cmng(struct bnx2x *bp,
1044void bnx2x_acquire_phy_lock(struct bnx2x *bp); 1053void bnx2x_acquire_phy_lock(struct bnx2x *bp);
1045void bnx2x_release_phy_lock(struct bnx2x *bp); 1054void bnx2x_release_phy_lock(struct bnx2x *bp);
1046 1055
1056/**
1057 * Extracts MAX BW part from MF configuration.
1058 *
1059 * @param bp
1060 * @param mf_cfg
1061 *
1062 * @return u16
1063 */
1064static inline u16 bnx2x_extract_max_cfg(struct bnx2x *bp, u32 mf_cfg)
1065{
1066 u16 max_cfg = (mf_cfg & FUNC_MF_CFG_MAX_BW_MASK) >>
1067 FUNC_MF_CFG_MAX_BW_SHIFT;
1068 if (!max_cfg) {
1069 BNX2X_ERR("Illegal configuration detected for Max BW - "
1070 "using 100 instead\n");
1071 max_cfg = 100;
1072 }
1073 return max_cfg;
1074}
1075
1047#endif /* BNX2X_CMN_H */ 1076#endif /* BNX2X_CMN_H */
diff --git a/drivers/net/bnx2x/bnx2x_ethtool.c b/drivers/net/bnx2x/bnx2x_ethtool.c
index 5b44a8b48509..7e92f9d0dcfd 100644
--- a/drivers/net/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/bnx2x/bnx2x_ethtool.c
@@ -238,7 +238,7 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
238 speed |= (cmd->speed_hi << 16); 238 speed |= (cmd->speed_hi << 16);
239 239
240 if (IS_MF_SI(bp)) { 240 if (IS_MF_SI(bp)) {
241 u32 param = 0; 241 u32 part;
242 u32 line_speed = bp->link_vars.line_speed; 242 u32 line_speed = bp->link_vars.line_speed;
243 243
244 /* use 10G if no link detected */ 244 /* use 10G if no link detected */
@@ -251,23 +251,22 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
251 REQ_BC_VER_4_SET_MF_BW); 251 REQ_BC_VER_4_SET_MF_BW);
252 return -EINVAL; 252 return -EINVAL;
253 } 253 }
254 if (line_speed < speed) { 254
255 BNX2X_DEV_INFO("New speed should be less or equal " 255 part = (speed * 100) / line_speed;
256 "to actual line speed\n"); 256
257 if (line_speed < speed || !part) {
258 BNX2X_DEV_INFO("Speed setting should be in a range "
259 "from 1%% to 100%% "
260 "of actual line speed\n");
257 return -EINVAL; 261 return -EINVAL;
258 } 262 }
259 /* load old values */
260 param = bp->mf_config[BP_VN(bp)];
261
262 /* leave only MIN value */
263 param &= FUNC_MF_CFG_MIN_BW_MASK;
264 263
265 /* set new MAX value */ 264 if (bp->state != BNX2X_STATE_OPEN)
266 param |= (((speed * 100) / line_speed) 265 /* store value for following "load" */
267 << FUNC_MF_CFG_MAX_BW_SHIFT) 266 bp->pending_max = part;
268 & FUNC_MF_CFG_MAX_BW_MASK; 267 else
268 bnx2x_update_max_mf_config(bp, part);
269 269
270 bnx2x_fw_command(bp, DRV_MSG_CODE_SET_MF_BW, param);
271 return 0; 270 return 0;
272 } 271 }
273 272
@@ -1781,9 +1780,7 @@ static int bnx2x_test_nvram(struct bnx2x *bp)
1781 { 0x100, 0x350 }, /* manuf_info */ 1780 { 0x100, 0x350 }, /* manuf_info */
1782 { 0x450, 0xf0 }, /* feature_info */ 1781 { 0x450, 0xf0 }, /* feature_info */
1783 { 0x640, 0x64 }, /* upgrade_key_info */ 1782 { 0x640, 0x64 }, /* upgrade_key_info */
1784 { 0x6a4, 0x64 },
1785 { 0x708, 0x70 }, /* manuf_key_info */ 1783 { 0x708, 0x70 }, /* manuf_key_info */
1786 { 0x778, 0x70 },
1787 { 0, 0 } 1784 { 0, 0 }
1788 }; 1785 };
1789 __be32 buf[0x350 / 4]; 1786 __be32 buf[0x350 / 4];
@@ -1933,11 +1930,11 @@ static void bnx2x_self_test(struct net_device *dev,
1933 buf[4] = 1; 1930 buf[4] = 1;
1934 etest->flags |= ETH_TEST_FL_FAILED; 1931 etest->flags |= ETH_TEST_FL_FAILED;
1935 } 1932 }
1936 if (bp->port.pmf) 1933
1937 if (bnx2x_link_test(bp, is_serdes) != 0) { 1934 if (bnx2x_link_test(bp, is_serdes) != 0) {
1938 buf[5] = 1; 1935 buf[5] = 1;
1939 etest->flags |= ETH_TEST_FL_FAILED; 1936 etest->flags |= ETH_TEST_FL_FAILED;
1940 } 1937 }
1941 1938
1942#ifdef BNX2X_EXTRA_DEBUG 1939#ifdef BNX2X_EXTRA_DEBUG
1943 bnx2x_panic_dump(bp); 1940 bnx2x_panic_dump(bp);
diff --git a/drivers/net/bnx2x/bnx2x_init.h b/drivers/net/bnx2x/bnx2x_init.h
index 5a268e9a0895..fa6dbe3f2058 100644
--- a/drivers/net/bnx2x/bnx2x_init.h
+++ b/drivers/net/bnx2x/bnx2x_init.h
@@ -241,7 +241,7 @@ static const struct {
241 /* Block IGU, MISC, PXP and PXP2 parity errors as long as we don't 241 /* Block IGU, MISC, PXP and PXP2 parity errors as long as we don't
242 * want to handle "system kill" flow at the moment. 242 * want to handle "system kill" flow at the moment.
243 */ 243 */
244 BLOCK_PRTY_INFO(PXP, 0x3ffffff, 0x3ffffff, 0x3ffffff, 0x3ffffff), 244 BLOCK_PRTY_INFO(PXP, 0x7ffffff, 0x3ffffff, 0x3ffffff, 0x7ffffff),
245 BLOCK_PRTY_INFO_0(PXP2, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), 245 BLOCK_PRTY_INFO_0(PXP2, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
246 BLOCK_PRTY_INFO_1(PXP2, 0x7ff, 0x7f, 0x7f, 0x7ff), 246 BLOCK_PRTY_INFO_1(PXP2, 0x7ff, 0x7f, 0x7f, 0x7ff),
247 BLOCK_PRTY_INFO(HC, 0x7, 0x7, 0x7, 0), 247 BLOCK_PRTY_INFO(HC, 0x7, 0x7, 0x7, 0),
diff --git a/drivers/net/bnx2x/bnx2x_main.c b/drivers/net/bnx2x/bnx2x_main.c
index d584d32c747d..aa032339e321 100644
--- a/drivers/net/bnx2x/bnx2x_main.c
+++ b/drivers/net/bnx2x/bnx2x_main.c
@@ -1974,13 +1974,22 @@ static void bnx2x_init_vn_minmax(struct bnx2x *bp, int vn)
1974 vn_max_rate = 0; 1974 vn_max_rate = 0;
1975 1975
1976 } else { 1976 } else {
1977 u32 maxCfg = bnx2x_extract_max_cfg(bp, vn_cfg);
1978
1977 vn_min_rate = ((vn_cfg & FUNC_MF_CFG_MIN_BW_MASK) >> 1979 vn_min_rate = ((vn_cfg & FUNC_MF_CFG_MIN_BW_MASK) >>
1978 FUNC_MF_CFG_MIN_BW_SHIFT) * 100; 1980 FUNC_MF_CFG_MIN_BW_SHIFT) * 100;
1979 /* If min rate is zero - set it to 1 */ 1981 /* If fairness is enabled (not all min rates are zeroes) and
1982 if current min rate is zero - set it to 1.
1983 This is a requirement of the algorithm. */
1980 if (bp->vn_weight_sum && (vn_min_rate == 0)) 1984 if (bp->vn_weight_sum && (vn_min_rate == 0))
1981 vn_min_rate = DEF_MIN_RATE; 1985 vn_min_rate = DEF_MIN_RATE;
1982 vn_max_rate = ((vn_cfg & FUNC_MF_CFG_MAX_BW_MASK) >> 1986
1983 FUNC_MF_CFG_MAX_BW_SHIFT) * 100; 1987 if (IS_MF_SI(bp))
1988 /* maxCfg in percents of linkspeed */
1989 vn_max_rate = (bp->link_vars.line_speed * maxCfg) / 100;
1990 else
1991 /* maxCfg is absolute in 100Mb units */
1992 vn_max_rate = maxCfg * 100;
1984 } 1993 }
1985 1994
1986 DP(NETIF_MSG_IFUP, 1995 DP(NETIF_MSG_IFUP,
@@ -2006,7 +2015,8 @@ static void bnx2x_init_vn_minmax(struct bnx2x *bp, int vn)
2006 m_fair_vn.vn_credit_delta = 2015 m_fair_vn.vn_credit_delta =
2007 max_t(u32, (vn_min_rate * (T_FAIR_COEF / 2016 max_t(u32, (vn_min_rate * (T_FAIR_COEF /
2008 (8 * bp->vn_weight_sum))), 2017 (8 * bp->vn_weight_sum))),
2009 (bp->cmng.fair_vars.fair_threshold * 2)); 2018 (bp->cmng.fair_vars.fair_threshold +
2019 MIN_ABOVE_THRESH));
2010 DP(NETIF_MSG_IFUP, "m_fair_vn.vn_credit_delta %d\n", 2020 DP(NETIF_MSG_IFUP, "m_fair_vn.vn_credit_delta %d\n",
2011 m_fair_vn.vn_credit_delta); 2021 m_fair_vn.vn_credit_delta);
2012 } 2022 }
@@ -2082,8 +2092,9 @@ static void bnx2x_cmng_fns_init(struct bnx2x *bp, u8 read_cfg, u8 cmng_type)
2082 bnx2x_calc_vn_weight_sum(bp); 2092 bnx2x_calc_vn_weight_sum(bp);
2083 2093
2084 /* calculate and set min-max rate for each vn */ 2094 /* calculate and set min-max rate for each vn */
2085 for (vn = VN_0; vn < E1HVN_MAX; vn++) 2095 if (bp->port.pmf)
2086 bnx2x_init_vn_minmax(bp, vn); 2096 for (vn = VN_0; vn < E1HVN_MAX; vn++)
2097 bnx2x_init_vn_minmax(bp, vn);
2087 2098
2088 /* always enable rate shaping and fairness */ 2099 /* always enable rate shaping and fairness */
2089 bp->cmng.flags.cmng_enables |= 2100 bp->cmng.flags.cmng_enables |=
@@ -2152,13 +2163,6 @@ static void bnx2x_link_attn(struct bnx2x *bp)
2152 bnx2x_stats_handle(bp, STATS_EVENT_LINK_UP); 2163 bnx2x_stats_handle(bp, STATS_EVENT_LINK_UP);
2153 } 2164 }
2154 2165
2155 /* indicate link status only if link status actually changed */
2156 if (prev_link_status != bp->link_vars.link_status)
2157 bnx2x_link_report(bp);
2158
2159 if (IS_MF(bp))
2160 bnx2x_link_sync_notify(bp);
2161
2162 if (bp->link_vars.link_up && bp->link_vars.line_speed) { 2166 if (bp->link_vars.link_up && bp->link_vars.line_speed) {
2163 int cmng_fns = bnx2x_get_cmng_fns_mode(bp); 2167 int cmng_fns = bnx2x_get_cmng_fns_mode(bp);
2164 2168
@@ -2170,6 +2174,13 @@ static void bnx2x_link_attn(struct bnx2x *bp)
2170 DP(NETIF_MSG_IFUP, 2174 DP(NETIF_MSG_IFUP,
2171 "single function mode without fairness\n"); 2175 "single function mode without fairness\n");
2172 } 2176 }
2177
2178 if (IS_MF(bp))
2179 bnx2x_link_sync_notify(bp);
2180
2181 /* indicate link status only if link status actually changed */
2182 if (prev_link_status != bp->link_vars.link_status)
2183 bnx2x_link_report(bp);
2173} 2184}
2174 2185
2175void bnx2x__link_status_update(struct bnx2x *bp) 2186void bnx2x__link_status_update(struct bnx2x *bp)
diff --git a/drivers/net/bnx2x/bnx2x_stats.c b/drivers/net/bnx2x/bnx2x_stats.c
index bda60d590fa8..3445ded6674f 100644
--- a/drivers/net/bnx2x/bnx2x_stats.c
+++ b/drivers/net/bnx2x/bnx2x_stats.c
@@ -1239,14 +1239,14 @@ void bnx2x_stats_handle(struct bnx2x *bp, enum bnx2x_stats_event event)
1239 if (unlikely(bp->panic)) 1239 if (unlikely(bp->panic))
1240 return; 1240 return;
1241 1241
1242 bnx2x_stats_stm[bp->stats_state][event].action(bp);
1243
1242 /* Protect a state change flow */ 1244 /* Protect a state change flow */
1243 spin_lock_bh(&bp->stats_lock); 1245 spin_lock_bh(&bp->stats_lock);
1244 state = bp->stats_state; 1246 state = bp->stats_state;
1245 bp->stats_state = bnx2x_stats_stm[state][event].next_state; 1247 bp->stats_state = bnx2x_stats_stm[state][event].next_state;
1246 spin_unlock_bh(&bp->stats_lock); 1248 spin_unlock_bh(&bp->stats_lock);
1247 1249
1248 bnx2x_stats_stm[state][event].action(bp);
1249
1250 if ((event != STATS_EVENT_UPDATE) || netif_msg_timer(bp)) 1250 if ((event != STATS_EVENT_UPDATE) || netif_msg_timer(bp))
1251 DP(BNX2X_MSG_STATS, "state %d -> event %d -> state %d\n", 1251 DP(BNX2X_MSG_STATS, "state %d -> event %d -> state %d\n",
1252 state, event, bp->stats_state); 1252 state, event, bp->stats_state);
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 1024ae158227..a5d5d0b5b155 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -281,23 +281,23 @@ static inline int __check_agg_selection_timer(struct port *port)
281} 281}
282 282
283/** 283/**
284 * __get_rx_machine_lock - lock the port's RX machine 284 * __get_state_machine_lock - lock the port's state machines
285 * @port: the port we're looking at 285 * @port: the port we're looking at
286 * 286 *
287 */ 287 */
288static inline void __get_rx_machine_lock(struct port *port) 288static inline void __get_state_machine_lock(struct port *port)
289{ 289{
290 spin_lock_bh(&(SLAVE_AD_INFO(port->slave).rx_machine_lock)); 290 spin_lock_bh(&(SLAVE_AD_INFO(port->slave).state_machine_lock));
291} 291}
292 292
293/** 293/**
294 * __release_rx_machine_lock - unlock the port's RX machine 294 * __release_state_machine_lock - unlock the port's state machines
295 * @port: the port we're looking at 295 * @port: the port we're looking at
296 * 296 *
297 */ 297 */
298static inline void __release_rx_machine_lock(struct port *port) 298static inline void __release_state_machine_lock(struct port *port)
299{ 299{
300 spin_unlock_bh(&(SLAVE_AD_INFO(port->slave).rx_machine_lock)); 300 spin_unlock_bh(&(SLAVE_AD_INFO(port->slave).state_machine_lock));
301} 301}
302 302
303/** 303/**
@@ -388,14 +388,14 @@ static u8 __get_duplex(struct port *port)
388} 388}
389 389
390/** 390/**
391 * __initialize_port_locks - initialize a port's RX machine spinlock 391 * __initialize_port_locks - initialize a port's STATE machine spinlock
392 * @port: the port we're looking at 392 * @port: the port we're looking at
393 * 393 *
394 */ 394 */
395static inline void __initialize_port_locks(struct port *port) 395static inline void __initialize_port_locks(struct port *port)
396{ 396{
397 // make sure it isn't called twice 397 // make sure it isn't called twice
398 spin_lock_init(&(SLAVE_AD_INFO(port->slave).rx_machine_lock)); 398 spin_lock_init(&(SLAVE_AD_INFO(port->slave).state_machine_lock));
399} 399}
400 400
401//conversions 401//conversions
@@ -1025,9 +1025,6 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
1025{ 1025{
1026 rx_states_t last_state; 1026 rx_states_t last_state;
1027 1027
1028 // Lock to prevent 2 instances of this function to run simultaneously(rx interrupt and periodic machine callback)
1029 __get_rx_machine_lock(port);
1030
1031 // keep current State Machine state to compare later if it was changed 1028 // keep current State Machine state to compare later if it was changed
1032 last_state = port->sm_rx_state; 1029 last_state = port->sm_rx_state;
1033 1030
@@ -1133,7 +1130,6 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
1133 pr_err("%s: An illegal loopback occurred on adapter (%s).\n" 1130 pr_err("%s: An illegal loopback occurred on adapter (%s).\n"
1134 "Check the configuration to verify that all adapters are connected to 802.3ad compliant switch ports\n", 1131 "Check the configuration to verify that all adapters are connected to 802.3ad compliant switch ports\n",
1135 port->slave->dev->master->name, port->slave->dev->name); 1132 port->slave->dev->master->name, port->slave->dev->name);
1136 __release_rx_machine_lock(port);
1137 return; 1133 return;
1138 } 1134 }
1139 __update_selected(lacpdu, port); 1135 __update_selected(lacpdu, port);
@@ -1153,7 +1149,6 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
1153 break; 1149 break;
1154 } 1150 }
1155 } 1151 }
1156 __release_rx_machine_lock(port);
1157} 1152}
1158 1153
1159/** 1154/**
@@ -2155,6 +2150,12 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
2155 goto re_arm; 2150 goto re_arm;
2156 } 2151 }
2157 2152
2153 /* Lock around state machines to protect data accessed
2154 * by all (e.g., port->sm_vars). ad_rx_machine may run
2155 * concurrently due to incoming LACPDU.
2156 */
2157 __get_state_machine_lock(port);
2158
2158 ad_rx_machine(NULL, port); 2159 ad_rx_machine(NULL, port);
2159 ad_periodic_machine(port); 2160 ad_periodic_machine(port);
2160 ad_port_selection_logic(port); 2161 ad_port_selection_logic(port);
@@ -2164,6 +2165,8 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
2164 // turn off the BEGIN bit, since we already handled it 2165 // turn off the BEGIN bit, since we already handled it
2165 if (port->sm_vars & AD_PORT_BEGIN) 2166 if (port->sm_vars & AD_PORT_BEGIN)
2166 port->sm_vars &= ~AD_PORT_BEGIN; 2167 port->sm_vars &= ~AD_PORT_BEGIN;
2168
2169 __release_state_machine_lock(port);
2167 } 2170 }
2168 2171
2169re_arm: 2172re_arm:
@@ -2200,7 +2203,10 @@ static void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u
2200 case AD_TYPE_LACPDU: 2203 case AD_TYPE_LACPDU:
2201 pr_debug("Received LACPDU on port %d\n", 2204 pr_debug("Received LACPDU on port %d\n",
2202 port->actor_port_number); 2205 port->actor_port_number);
2206 /* Protect against concurrent state machines */
2207 __get_state_machine_lock(port);
2203 ad_rx_machine(lacpdu, port); 2208 ad_rx_machine(lacpdu, port);
2209 __release_state_machine_lock(port);
2204 break; 2210 break;
2205 2211
2206 case AD_TYPE_MARKER: 2212 case AD_TYPE_MARKER:
diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h
index 2c46a154f2c6..b28baff70864 100644
--- a/drivers/net/bonding/bond_3ad.h
+++ b/drivers/net/bonding/bond_3ad.h
@@ -264,7 +264,8 @@ struct ad_bond_info {
264struct ad_slave_info { 264struct ad_slave_info {
265 struct aggregator aggregator; // 802.3ad aggregator structure 265 struct aggregator aggregator; // 802.3ad aggregator structure
266 struct port port; // 802.3ad port structure 266 struct port port; // 802.3ad port structure
267 spinlock_t rx_machine_lock; // To avoid race condition between callback and receive interrupt 267 spinlock_t state_machine_lock; /* mutex state machines vs.
268 incoming LACPDU */
268 u16 id; 269 u16 id;
269}; 270};
270 271
diff --git a/drivers/net/can/softing/softing_main.c b/drivers/net/can/softing/softing_main.c
index 5157e15e96eb..aeea9f9ff6e8 100644
--- a/drivers/net/can/softing/softing_main.c
+++ b/drivers/net/can/softing/softing_main.c
@@ -633,6 +633,7 @@ static const struct net_device_ops softing_netdev_ops = {
633}; 633};
634 634
635static const struct can_bittiming_const softing_btr_const = { 635static const struct can_bittiming_const softing_btr_const = {
636 .name = "softing",
636 .tseg1_min = 1, 637 .tseg1_min = 1,
637 .tseg1_max = 16, 638 .tseg1_max = 16,
638 .tseg2_min = 1, 639 .tseg2_min = 1,
diff --git a/drivers/net/cnic.c b/drivers/net/cnic.c
index 7ff170cbc7dc..302be4aa69d6 100644
--- a/drivers/net/cnic.c
+++ b/drivers/net/cnic.c
@@ -2760,6 +2760,8 @@ static u32 cnic_service_bnx2_queues(struct cnic_dev *dev)
2760 u32 status_idx = (u16) *cp->kcq1.status_idx_ptr; 2760 u32 status_idx = (u16) *cp->kcq1.status_idx_ptr;
2761 int kcqe_cnt; 2761 int kcqe_cnt;
2762 2762
2763 /* status block index must be read before reading other fields */
2764 rmb();
2763 cp->kwq_con_idx = *cp->kwq_con_idx_ptr; 2765 cp->kwq_con_idx = *cp->kwq_con_idx_ptr;
2764 2766
2765 while ((kcqe_cnt = cnic_get_kcqes(dev, &cp->kcq1))) { 2767 while ((kcqe_cnt = cnic_get_kcqes(dev, &cp->kcq1))) {
@@ -2770,6 +2772,8 @@ static u32 cnic_service_bnx2_queues(struct cnic_dev *dev)
2770 barrier(); 2772 barrier();
2771 if (status_idx != *cp->kcq1.status_idx_ptr) { 2773 if (status_idx != *cp->kcq1.status_idx_ptr) {
2772 status_idx = (u16) *cp->kcq1.status_idx_ptr; 2774 status_idx = (u16) *cp->kcq1.status_idx_ptr;
2775 /* status block index must be read first */
2776 rmb();
2773 cp->kwq_con_idx = *cp->kwq_con_idx_ptr; 2777 cp->kwq_con_idx = *cp->kwq_con_idx_ptr;
2774 } else 2778 } else
2775 break; 2779 break;
@@ -2888,6 +2892,8 @@ static u32 cnic_service_bnx2x_kcq(struct cnic_dev *dev, struct kcq_info *info)
2888 u32 last_status = *info->status_idx_ptr; 2892 u32 last_status = *info->status_idx_ptr;
2889 int kcqe_cnt; 2893 int kcqe_cnt;
2890 2894
2895 /* status block index must be read before reading the KCQ */
2896 rmb();
2891 while ((kcqe_cnt = cnic_get_kcqes(dev, info))) { 2897 while ((kcqe_cnt = cnic_get_kcqes(dev, info))) {
2892 2898
2893 service_kcqes(dev, kcqe_cnt); 2899 service_kcqes(dev, kcqe_cnt);
@@ -2898,6 +2904,8 @@ static u32 cnic_service_bnx2x_kcq(struct cnic_dev *dev, struct kcq_info *info)
2898 break; 2904 break;
2899 2905
2900 last_status = *info->status_idx_ptr; 2906 last_status = *info->status_idx_ptr;
2907 /* status block index must be read before reading the KCQ */
2908 rmb();
2901 } 2909 }
2902 return last_status; 2910 return last_status;
2903} 2911}
@@ -2906,26 +2914,35 @@ static void cnic_service_bnx2x_bh(unsigned long data)
2906{ 2914{
2907 struct cnic_dev *dev = (struct cnic_dev *) data; 2915 struct cnic_dev *dev = (struct cnic_dev *) data;
2908 struct cnic_local *cp = dev->cnic_priv; 2916 struct cnic_local *cp = dev->cnic_priv;
2909 u32 status_idx; 2917 u32 status_idx, new_status_idx;
2910 2918
2911 if (unlikely(!test_bit(CNIC_F_CNIC_UP, &dev->flags))) 2919 if (unlikely(!test_bit(CNIC_F_CNIC_UP, &dev->flags)))
2912 return; 2920 return;
2913 2921
2914 status_idx = cnic_service_bnx2x_kcq(dev, &cp->kcq1); 2922 while (1) {
2923 status_idx = cnic_service_bnx2x_kcq(dev, &cp->kcq1);
2915 2924
2916 CNIC_WR16(dev, cp->kcq1.io_addr, cp->kcq1.sw_prod_idx + MAX_KCQ_IDX); 2925 CNIC_WR16(dev, cp->kcq1.io_addr,
2926 cp->kcq1.sw_prod_idx + MAX_KCQ_IDX);
2917 2927
2918 if (BNX2X_CHIP_IS_E2(cp->chip_id)) { 2928 if (!BNX2X_CHIP_IS_E2(cp->chip_id)) {
2919 status_idx = cnic_service_bnx2x_kcq(dev, &cp->kcq2); 2929 cnic_ack_bnx2x_int(dev, cp->bnx2x_igu_sb_id, USTORM_ID,
2930 status_idx, IGU_INT_ENABLE, 1);
2931 break;
2932 }
2933
2934 new_status_idx = cnic_service_bnx2x_kcq(dev, &cp->kcq2);
2935
2936 if (new_status_idx != status_idx)
2937 continue;
2920 2938
2921 CNIC_WR16(dev, cp->kcq2.io_addr, cp->kcq2.sw_prod_idx + 2939 CNIC_WR16(dev, cp->kcq2.io_addr, cp->kcq2.sw_prod_idx +
2922 MAX_KCQ_IDX); 2940 MAX_KCQ_IDX);
2923 2941
2924 cnic_ack_igu_sb(dev, cp->bnx2x_igu_sb_id, IGU_SEG_ACCESS_DEF, 2942 cnic_ack_igu_sb(dev, cp->bnx2x_igu_sb_id, IGU_SEG_ACCESS_DEF,
2925 status_idx, IGU_INT_ENABLE, 1); 2943 status_idx, IGU_INT_ENABLE, 1);
2926 } else { 2944
2927 cnic_ack_bnx2x_int(dev, cp->bnx2x_igu_sb_id, USTORM_ID, 2945 break;
2928 status_idx, IGU_INT_ENABLE, 1);
2929 } 2946 }
2930} 2947}
2931 2948
diff --git a/drivers/net/davinci_emac.c b/drivers/net/davinci_emac.c
index 2a628d17d178..7018bfe408a4 100644
--- a/drivers/net/davinci_emac.c
+++ b/drivers/net/davinci_emac.c
@@ -1008,7 +1008,7 @@ static void emac_rx_handler(void *token, int len, int status)
1008 int ret; 1008 int ret;
1009 1009
1010 /* free and bail if we are shutting down */ 1010 /* free and bail if we are shutting down */
1011 if (unlikely(!netif_running(ndev))) { 1011 if (unlikely(!netif_running(ndev) || !netif_carrier_ok(ndev))) {
1012 dev_kfree_skb_any(skb); 1012 dev_kfree_skb_any(skb);
1013 return; 1013 return;
1014 } 1014 }
diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c
index 2d4c4fc1d900..461dd6f905f7 100644
--- a/drivers/net/dm9000.c
+++ b/drivers/net/dm9000.c
@@ -802,10 +802,7 @@ dm9000_init_dm9000(struct net_device *dev)
802 /* Checksum mode */ 802 /* Checksum mode */
803 dm9000_set_rx_csum_unlocked(dev, db->rx_csum); 803 dm9000_set_rx_csum_unlocked(dev, db->rx_csum);
804 804
805 /* GPIO0 on pre-activate PHY */
806 iow(db, DM9000_GPR, 0); /* REG_1F bit0 activate phyxcer */
807 iow(db, DM9000_GPCR, GPCR_GEP_CNTL); /* Let GPIO0 output */ 805 iow(db, DM9000_GPCR, GPCR_GEP_CNTL); /* Let GPIO0 output */
808 iow(db, DM9000_GPR, 0); /* Enable PHY */
809 806
810 ncr = (db->flags & DM9000_PLATF_EXT_PHY) ? NCR_EXT_PHY : 0; 807 ncr = (db->flags & DM9000_PLATF_EXT_PHY) ? NCR_EXT_PHY : 0;
811 808
@@ -852,8 +849,8 @@ static void dm9000_timeout(struct net_device *dev)
852 unsigned long flags; 849 unsigned long flags;
853 850
854 /* Save previous register address */ 851 /* Save previous register address */
855 reg_save = readb(db->io_addr);
856 spin_lock_irqsave(&db->lock, flags); 852 spin_lock_irqsave(&db->lock, flags);
853 reg_save = readb(db->io_addr);
857 854
858 netif_stop_queue(dev); 855 netif_stop_queue(dev);
859 dm9000_reset(db); 856 dm9000_reset(db);
@@ -1194,6 +1191,10 @@ dm9000_open(struct net_device *dev)
1194 if (request_irq(dev->irq, dm9000_interrupt, irqflags, dev->name, dev)) 1191 if (request_irq(dev->irq, dm9000_interrupt, irqflags, dev->name, dev))
1195 return -EAGAIN; 1192 return -EAGAIN;
1196 1193
1194 /* GPIO0 on pre-activate PHY, Reg 1F is not set by reset */
1195 iow(db, DM9000_GPR, 0); /* REG_1F bit0 activate phyxcer */
1196 mdelay(1); /* delay needs by DM9000B */
1197
1197 /* Initialize DM9000 board */ 1198 /* Initialize DM9000 board */
1198 dm9000_reset(db); 1199 dm9000_reset(db);
1199 dm9000_init_dm9000(dev); 1200 dm9000_init_dm9000(dev);
diff --git a/drivers/net/dnet.c b/drivers/net/dnet.c
index 9d8a20b72fa9..8318ea06cb6d 100644
--- a/drivers/net/dnet.c
+++ b/drivers/net/dnet.c
@@ -337,8 +337,6 @@ static int dnet_mii_init(struct dnet *bp)
337 for (i = 0; i < PHY_MAX_ADDR; i++) 337 for (i = 0; i < PHY_MAX_ADDR; i++)
338 bp->mii_bus->irq[i] = PHY_POLL; 338 bp->mii_bus->irq[i] = PHY_POLL;
339 339
340 platform_set_drvdata(bp->dev, bp->mii_bus);
341
342 if (mdiobus_register(bp->mii_bus)) { 340 if (mdiobus_register(bp->mii_bus)) {
343 err = -ENXIO; 341 err = -ENXIO;
344 goto err_out_free_mdio_irq; 342 goto err_out_free_mdio_irq;
@@ -863,6 +861,7 @@ static int __devinit dnet_probe(struct platform_device *pdev)
863 bp = netdev_priv(dev); 861 bp = netdev_priv(dev);
864 bp->dev = dev; 862 bp->dev = dev;
865 863
864 platform_set_drvdata(pdev, dev);
866 SET_NETDEV_DEV(dev, &pdev->dev); 865 SET_NETDEV_DEV(dev, &pdev->dev);
867 866
868 spin_lock_init(&bp->lock); 867 spin_lock_init(&bp->lock);
diff --git a/drivers/net/e1000/e1000_osdep.h b/drivers/net/e1000/e1000_osdep.h
index 55c1711f1688..33e7c45a4fe4 100644
--- a/drivers/net/e1000/e1000_osdep.h
+++ b/drivers/net/e1000/e1000_osdep.h
@@ -42,7 +42,8 @@
42#define GBE_CONFIG_RAM_BASE \ 42#define GBE_CONFIG_RAM_BASE \
43 ((unsigned int)(CONFIG_RAM_BASE + GBE_CONFIG_OFFSET)) 43 ((unsigned int)(CONFIG_RAM_BASE + GBE_CONFIG_OFFSET))
44 44
45#define GBE_CONFIG_BASE_VIRT phys_to_virt(GBE_CONFIG_RAM_BASE) 45#define GBE_CONFIG_BASE_VIRT \
46 ((void __iomem *)phys_to_virt(GBE_CONFIG_RAM_BASE))
46 47
47#define GBE_CONFIG_FLASH_WRITE(base, offset, count, data) \ 48#define GBE_CONFIG_FLASH_WRITE(base, offset, count, data) \
48 (iowrite16_rep(base + offset, data, count)) 49 (iowrite16_rep(base + offset, data, count))
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 3fa110ddb041..2e5022849f18 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -5967,7 +5967,8 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
5967 /* APME bit in EEPROM is mapped to WUC.APME */ 5967 /* APME bit in EEPROM is mapped to WUC.APME */
5968 eeprom_data = er32(WUC); 5968 eeprom_data = er32(WUC);
5969 eeprom_apme_mask = E1000_WUC_APME; 5969 eeprom_apme_mask = E1000_WUC_APME;
5970 if (eeprom_data & E1000_WUC_PHY_WAKE) 5970 if ((hw->mac.type > e1000_ich10lan) &&
5971 (eeprom_data & E1000_WUC_PHY_WAKE))
5971 adapter->flags2 |= FLAG2_HAS_PHY_WAKEUP; 5972 adapter->flags2 |= FLAG2_HAS_PHY_WAKEUP;
5972 } else if (adapter->flags & FLAG_APME_IN_CTRL3) { 5973 } else if (adapter->flags & FLAG_APME_IN_CTRL3) {
5973 if (adapter->flags & FLAG_APME_CHECK_PORT_B && 5974 if (adapter->flags & FLAG_APME_CHECK_PORT_B &&
diff --git a/drivers/net/fec.c b/drivers/net/fec.c
index 2a71373719ae..cd0282d5d40f 100644
--- a/drivers/net/fec.c
+++ b/drivers/net/fec.c
@@ -74,7 +74,8 @@ static struct platform_device_id fec_devtype[] = {
74 }, { 74 }, {
75 .name = "imx28-fec", 75 .name = "imx28-fec",
76 .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME, 76 .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME,
77 } 77 },
78 { }
78}; 79};
79 80
80static unsigned char macaddr[ETH_ALEN]; 81static unsigned char macaddr[ETH_ALEN];
diff --git a/drivers/net/igbvf/vf.c b/drivers/net/igbvf/vf.c
index 74486a8b009a..af3822f9ea9a 100644
--- a/drivers/net/igbvf/vf.c
+++ b/drivers/net/igbvf/vf.c
@@ -220,7 +220,7 @@ static u32 e1000_hash_mc_addr_vf(struct e1000_hw *hw, u8 *mc_addr)
220 * The parameter rar_count will usually be hw->mac.rar_entry_count 220 * The parameter rar_count will usually be hw->mac.rar_entry_count
221 * unless there are workarounds that change this. 221 * unless there are workarounds that change this.
222 **/ 222 **/
223void e1000_update_mc_addr_list_vf(struct e1000_hw *hw, 223static void e1000_update_mc_addr_list_vf(struct e1000_hw *hw,
224 u8 *mc_addr_list, u32 mc_addr_count, 224 u8 *mc_addr_list, u32 mc_addr_count,
225 u32 rar_used_count, u32 rar_count) 225 u32 rar_used_count, u32 rar_count)
226{ 226{
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index f69e73e2191e..79ccb54ab00c 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -260,7 +260,7 @@ static int macb_mii_init(struct macb *bp)
260 for (i = 0; i < PHY_MAX_ADDR; i++) 260 for (i = 0; i < PHY_MAX_ADDR; i++)
261 bp->mii_bus->irq[i] = PHY_POLL; 261 bp->mii_bus->irq[i] = PHY_POLL;
262 262
263 platform_set_drvdata(bp->dev, bp->mii_bus); 263 dev_set_drvdata(&bp->dev->dev, bp->mii_bus);
264 264
265 if (mdiobus_register(bp->mii_bus)) 265 if (mdiobus_register(bp->mii_bus))
266 goto err_out_free_mdio_irq; 266 goto err_out_free_mdio_irq;
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 5933621ac3ff..fc27a9926d9e 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -528,8 +528,9 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q,
528 vnet_hdr_len = q->vnet_hdr_sz; 528 vnet_hdr_len = q->vnet_hdr_sz;
529 529
530 err = -EINVAL; 530 err = -EINVAL;
531 if ((len -= vnet_hdr_len) < 0) 531 if (len < vnet_hdr_len)
532 goto err; 532 goto err;
533 len -= vnet_hdr_len;
533 534
534 err = memcpy_fromiovecend((void *)&vnet_hdr, iv, 0, 535 err = memcpy_fromiovecend((void *)&vnet_hdr, iv, 0,
535 sizeof(vnet_hdr)); 536 sizeof(vnet_hdr));
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index 9226cda4d054..530ab5a10bd3 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -691,6 +691,7 @@ static struct pcmcia_device_id fmvj18x_ids[] = {
691 PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x0105, 0x0e0a), 691 PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x0105, 0x0e0a),
692 PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x0032, 0x0e01), 692 PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x0032, 0x0e01),
693 PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x0032, 0x0a05), 693 PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x0032, 0x0a05),
694 PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x0032, 0x0b05),
694 PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x0032, 0x1101), 695 PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x0032, 0x1101),
695 PCMCIA_DEVICE_NULL, 696 PCMCIA_DEVICE_NULL,
696}; 697};
diff --git a/drivers/net/r6040.c b/drivers/net/r6040.c
index 27e6f6d43cac..e3ebd90ae651 100644
--- a/drivers/net/r6040.c
+++ b/drivers/net/r6040.c
@@ -49,8 +49,8 @@
49#include <asm/processor.h> 49#include <asm/processor.h>
50 50
51#define DRV_NAME "r6040" 51#define DRV_NAME "r6040"
52#define DRV_VERSION "0.26" 52#define DRV_VERSION "0.27"
53#define DRV_RELDATE "30May2010" 53#define DRV_RELDATE "23Feb2011"
54 54
55/* PHY CHIP Address */ 55/* PHY CHIP Address */
56#define PHY1_ADDR 1 /* For MAC1 */ 56#define PHY1_ADDR 1 /* For MAC1 */
@@ -69,6 +69,8 @@
69 69
70/* MAC registers */ 70/* MAC registers */
71#define MCR0 0x00 /* Control register 0 */ 71#define MCR0 0x00 /* Control register 0 */
72#define MCR0_PROMISC 0x0020 /* Promiscuous mode */
73#define MCR0_HASH_EN 0x0100 /* Enable multicast hash table function */
72#define MCR1 0x04 /* Control register 1 */ 74#define MCR1 0x04 /* Control register 1 */
73#define MAC_RST 0x0001 /* Reset the MAC */ 75#define MAC_RST 0x0001 /* Reset the MAC */
74#define MBCR 0x08 /* Bus control */ 76#define MBCR 0x08 /* Bus control */
@@ -851,77 +853,92 @@ static void r6040_multicast_list(struct net_device *dev)
851{ 853{
852 struct r6040_private *lp = netdev_priv(dev); 854 struct r6040_private *lp = netdev_priv(dev);
853 void __iomem *ioaddr = lp->base; 855 void __iomem *ioaddr = lp->base;
854 u16 *adrp;
855 u16 reg;
856 unsigned long flags; 856 unsigned long flags;
857 struct netdev_hw_addr *ha; 857 struct netdev_hw_addr *ha;
858 int i; 858 int i;
859 u16 *adrp;
860 u16 hash_table[4] = { 0 };
861
862 spin_lock_irqsave(&lp->lock, flags);
859 863
860 /* MAC Address */ 864 /* Keep our MAC Address */
861 adrp = (u16 *)dev->dev_addr; 865 adrp = (u16 *)dev->dev_addr;
862 iowrite16(adrp[0], ioaddr + MID_0L); 866 iowrite16(adrp[0], ioaddr + MID_0L);
863 iowrite16(adrp[1], ioaddr + MID_0M); 867 iowrite16(adrp[1], ioaddr + MID_0M);
864 iowrite16(adrp[2], ioaddr + MID_0H); 868 iowrite16(adrp[2], ioaddr + MID_0H);
865 869
866 /* Promiscous Mode */
867 spin_lock_irqsave(&lp->lock, flags);
868
869 /* Clear AMCP & PROM bits */ 870 /* Clear AMCP & PROM bits */
870 reg = ioread16(ioaddr) & ~0x0120; 871 lp->mcr0 = ioread16(ioaddr + MCR0) & ~(MCR0_PROMISC | MCR0_HASH_EN);
871 if (dev->flags & IFF_PROMISC) {
872 reg |= 0x0020;
873 lp->mcr0 |= 0x0020;
874 }
875 /* Too many multicast addresses
876 * accept all traffic */
877 else if ((netdev_mc_count(dev) > MCAST_MAX) ||
878 (dev->flags & IFF_ALLMULTI))
879 reg |= 0x0020;
880 872
881 iowrite16(reg, ioaddr); 873 /* Promiscuous mode */
882 spin_unlock_irqrestore(&lp->lock, flags); 874 if (dev->flags & IFF_PROMISC)
875 lp->mcr0 |= MCR0_PROMISC;
883 876
884 /* Build the hash table */ 877 /* Enable multicast hash table function to
885 if (netdev_mc_count(dev) > MCAST_MAX) { 878 * receive all multicast packets. */
886 u16 hash_table[4]; 879 else if (dev->flags & IFF_ALLMULTI) {
887 u32 crc; 880 lp->mcr0 |= MCR0_HASH_EN;
888 881
889 for (i = 0; i < 4; i++) 882 for (i = 0; i < MCAST_MAX ; i++) {
890 hash_table[i] = 0; 883 iowrite16(0, ioaddr + MID_1L + 8 * i);
884 iowrite16(0, ioaddr + MID_1M + 8 * i);
885 iowrite16(0, ioaddr + MID_1H + 8 * i);
886 }
891 887
888 for (i = 0; i < 4; i++)
889 hash_table[i] = 0xffff;
890 }
891 /* Use internal multicast address registers if the number of
892 * multicast addresses is not greater than MCAST_MAX. */
893 else if (netdev_mc_count(dev) <= MCAST_MAX) {
894 i = 0;
892 netdev_for_each_mc_addr(ha, dev) { 895 netdev_for_each_mc_addr(ha, dev) {
893 char *addrs = ha->addr; 896 u16 *adrp = (u16 *) ha->addr;
897 iowrite16(adrp[0], ioaddr + MID_1L + 8 * i);
898 iowrite16(adrp[1], ioaddr + MID_1M + 8 * i);
899 iowrite16(adrp[2], ioaddr + MID_1H + 8 * i);
900 i++;
901 }
902 while (i < MCAST_MAX) {
903 iowrite16(0, ioaddr + MID_1L + 8 * i);
904 iowrite16(0, ioaddr + MID_1M + 8 * i);
905 iowrite16(0, ioaddr + MID_1H + 8 * i);
906 i++;
907 }
908 }
909 /* Otherwise, Enable multicast hash table function. */
910 else {
911 u32 crc;
894 912
895 if (!(*addrs & 1)) 913 lp->mcr0 |= MCR0_HASH_EN;
896 continue; 914
915 for (i = 0; i < MCAST_MAX ; i++) {
916 iowrite16(0, ioaddr + MID_1L + 8 * i);
917 iowrite16(0, ioaddr + MID_1M + 8 * i);
918 iowrite16(0, ioaddr + MID_1H + 8 * i);
919 }
897 920
898 crc = ether_crc_le(6, addrs); 921 /* Build multicast hash table */
922 netdev_for_each_mc_addr(ha, dev) {
923 u8 *addrs = ha->addr;
924
925 crc = ether_crc(ETH_ALEN, addrs);
899 crc >>= 26; 926 crc >>= 26;
900 hash_table[crc >> 4] |= 1 << (15 - (crc & 0xf)); 927 hash_table[crc >> 4] |= 1 << (crc & 0xf);
901 } 928 }
902 /* Fill the MAC hash tables with their values */ 929 }
930
931 iowrite16(lp->mcr0, ioaddr + MCR0);
932
933 /* Fill the MAC hash tables with their values */
934 if (lp->mcr0 && MCR0_HASH_EN) {
903 iowrite16(hash_table[0], ioaddr + MAR0); 935 iowrite16(hash_table[0], ioaddr + MAR0);
904 iowrite16(hash_table[1], ioaddr + MAR1); 936 iowrite16(hash_table[1], ioaddr + MAR1);
905 iowrite16(hash_table[2], ioaddr + MAR2); 937 iowrite16(hash_table[2], ioaddr + MAR2);
906 iowrite16(hash_table[3], ioaddr + MAR3); 938 iowrite16(hash_table[3], ioaddr + MAR3);
907 } 939 }
908 /* Multicast Address 1~4 case */ 940
909 i = 0; 941 spin_unlock_irqrestore(&lp->lock, flags);
910 netdev_for_each_mc_addr(ha, dev) {
911 if (i >= MCAST_MAX)
912 break;
913 adrp = (u16 *) ha->addr;
914 iowrite16(adrp[0], ioaddr + MID_1L + 8 * i);
915 iowrite16(adrp[1], ioaddr + MID_1M + 8 * i);
916 iowrite16(adrp[2], ioaddr + MID_1H + 8 * i);
917 i++;
918 }
919 while (i < MCAST_MAX) {
920 iowrite16(0xffff, ioaddr + MID_1L + 8 * i);
921 iowrite16(0xffff, ioaddr + MID_1M + 8 * i);
922 iowrite16(0xffff, ioaddr + MID_1H + 8 * i);
923 i++;
924 }
925} 942}
926 943
927static void netdev_get_drvinfo(struct net_device *dev, 944static void netdev_get_drvinfo(struct net_device *dev,
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 469ab0b7ce31..7ffdb80adf40 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -25,6 +25,7 @@
25#include <linux/dma-mapping.h> 25#include <linux/dma-mapping.h>
26#include <linux/pm_runtime.h> 26#include <linux/pm_runtime.h>
27#include <linux/firmware.h> 27#include <linux/firmware.h>
28#include <linux/pci-aspm.h>
28 29
29#include <asm/system.h> 30#include <asm/system.h>
30#include <asm/io.h> 31#include <asm/io.h>
@@ -617,8 +618,9 @@ static void ocp_write(struct rtl8169_private *tp, u8 mask, u16 reg, u32 data)
617 } 618 }
618} 619}
619 620
620static void rtl8168_oob_notify(void __iomem *ioaddr, u8 cmd) 621static void rtl8168_oob_notify(struct rtl8169_private *tp, u8 cmd)
621{ 622{
623 void __iomem *ioaddr = tp->mmio_addr;
622 int i; 624 int i;
623 625
624 RTL_W8(ERIDR, cmd); 626 RTL_W8(ERIDR, cmd);
@@ -630,7 +632,7 @@ static void rtl8168_oob_notify(void __iomem *ioaddr, u8 cmd)
630 break; 632 break;
631 } 633 }
632 634
633 ocp_write(ioaddr, 0x1, 0x30, 0x00000001); 635 ocp_write(tp, 0x1, 0x30, 0x00000001);
634} 636}
635 637
636#define OOB_CMD_RESET 0x00 638#define OOB_CMD_RESET 0x00
@@ -2868,8 +2870,11 @@ static void r8168_pll_power_down(struct rtl8169_private *tp)
2868{ 2870{
2869 void __iomem *ioaddr = tp->mmio_addr; 2871 void __iomem *ioaddr = tp->mmio_addr;
2870 2872
2871 if (tp->mac_version == RTL_GIGA_MAC_VER_27) 2873 if (((tp->mac_version == RTL_GIGA_MAC_VER_27) ||
2874 (tp->mac_version == RTL_GIGA_MAC_VER_28)) &&
2875 (ocp_read(tp, 0x0f, 0x0010) & 0x00008000)) {
2872 return; 2876 return;
2877 }
2873 2878
2874 if (((tp->mac_version == RTL_GIGA_MAC_VER_23) || 2879 if (((tp->mac_version == RTL_GIGA_MAC_VER_23) ||
2875 (tp->mac_version == RTL_GIGA_MAC_VER_24)) && 2880 (tp->mac_version == RTL_GIGA_MAC_VER_24)) &&
@@ -2891,6 +2896,8 @@ static void r8168_pll_power_down(struct rtl8169_private *tp)
2891 switch (tp->mac_version) { 2896 switch (tp->mac_version) {
2892 case RTL_GIGA_MAC_VER_25: 2897 case RTL_GIGA_MAC_VER_25:
2893 case RTL_GIGA_MAC_VER_26: 2898 case RTL_GIGA_MAC_VER_26:
2899 case RTL_GIGA_MAC_VER_27:
2900 case RTL_GIGA_MAC_VER_28:
2894 RTL_W8(PMCH, RTL_R8(PMCH) & ~0x80); 2901 RTL_W8(PMCH, RTL_R8(PMCH) & ~0x80);
2895 break; 2902 break;
2896 } 2903 }
@@ -2900,12 +2907,17 @@ static void r8168_pll_power_up(struct rtl8169_private *tp)
2900{ 2907{
2901 void __iomem *ioaddr = tp->mmio_addr; 2908 void __iomem *ioaddr = tp->mmio_addr;
2902 2909
2903 if (tp->mac_version == RTL_GIGA_MAC_VER_27) 2910 if (((tp->mac_version == RTL_GIGA_MAC_VER_27) ||
2911 (tp->mac_version == RTL_GIGA_MAC_VER_28)) &&
2912 (ocp_read(tp, 0x0f, 0x0010) & 0x00008000)) {
2904 return; 2913 return;
2914 }
2905 2915
2906 switch (tp->mac_version) { 2916 switch (tp->mac_version) {
2907 case RTL_GIGA_MAC_VER_25: 2917 case RTL_GIGA_MAC_VER_25:
2908 case RTL_GIGA_MAC_VER_26: 2918 case RTL_GIGA_MAC_VER_26:
2919 case RTL_GIGA_MAC_VER_27:
2920 case RTL_GIGA_MAC_VER_28:
2909 RTL_W8(PMCH, RTL_R8(PMCH) | 0x80); 2921 RTL_W8(PMCH, RTL_R8(PMCH) | 0x80);
2910 break; 2922 break;
2911 } 2923 }
@@ -3009,6 +3021,11 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
3009 mii->reg_num_mask = 0x1f; 3021 mii->reg_num_mask = 0x1f;
3010 mii->supports_gmii = !!(cfg->features & RTL_FEATURE_GMII); 3022 mii->supports_gmii = !!(cfg->features & RTL_FEATURE_GMII);
3011 3023
3024 /* disable ASPM completely as that cause random device stop working
3025 * problems as well as full system hangs for some PCIe devices users */
3026 pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1 |
3027 PCIE_LINK_STATE_CLKPM);
3028
3012 /* enable device (incl. PCI PM wakeup and hotplug setup) */ 3029 /* enable device (incl. PCI PM wakeup and hotplug setup) */
3013 rc = pci_enable_device(pdev); 3030 rc = pci_enable_device(pdev);
3014 if (rc < 0) { 3031 if (rc < 0) {
@@ -3042,7 +3059,7 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
3042 goto err_out_mwi_2; 3059 goto err_out_mwi_2;
3043 } 3060 }
3044 3061
3045 tp->cp_cmd = PCIMulRW | RxChkSum; 3062 tp->cp_cmd = RxChkSum;
3046 3063
3047 if ((sizeof(dma_addr_t) > 4) && 3064 if ((sizeof(dma_addr_t) > 4) &&
3048 !pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) && use_dac) { 3065 !pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) && use_dac) {
@@ -3318,7 +3335,8 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp)
3318 /* Disable interrupts */ 3335 /* Disable interrupts */
3319 rtl8169_irq_mask_and_ack(ioaddr); 3336 rtl8169_irq_mask_and_ack(ioaddr);
3320 3337
3321 if (tp->mac_version == RTL_GIGA_MAC_VER_28) { 3338 if (tp->mac_version == RTL_GIGA_MAC_VER_27 ||
3339 tp->mac_version == RTL_GIGA_MAC_VER_28) {
3322 while (RTL_R8(TxPoll) & NPQ) 3340 while (RTL_R8(TxPoll) & NPQ)
3323 udelay(20); 3341 udelay(20);
3324 3342
@@ -3847,8 +3865,7 @@ static void rtl_hw_start_8168(struct net_device *dev)
3847 Cxpl_dbg_sel | \ 3865 Cxpl_dbg_sel | \
3848 ASF | \ 3866 ASF | \
3849 PktCntrDisable | \ 3867 PktCntrDisable | \
3850 PCIDAC | \ 3868 Mac_dbgo_sel)
3851 PCIMulRW)
3852 3869
3853static void rtl_hw_start_8102e_1(void __iomem *ioaddr, struct pci_dev *pdev) 3870static void rtl_hw_start_8102e_1(void __iomem *ioaddr, struct pci_dev *pdev)
3854{ 3871{
@@ -3878,8 +3895,6 @@ static void rtl_hw_start_8102e_1(void __iomem *ioaddr, struct pci_dev *pdev)
3878 if ((cfg1 & LEDS0) && (cfg1 & LEDS1)) 3895 if ((cfg1 & LEDS0) && (cfg1 & LEDS1))
3879 RTL_W8(Config1, cfg1 & ~LEDS0); 3896 RTL_W8(Config1, cfg1 & ~LEDS0);
3880 3897
3881 RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R810X_CPCMD_QUIRK_MASK);
3882
3883 rtl_ephy_init(ioaddr, e_info_8102e_1, ARRAY_SIZE(e_info_8102e_1)); 3898 rtl_ephy_init(ioaddr, e_info_8102e_1, ARRAY_SIZE(e_info_8102e_1));
3884} 3899}
3885 3900
@@ -3891,8 +3906,6 @@ static void rtl_hw_start_8102e_2(void __iomem *ioaddr, struct pci_dev *pdev)
3891 3906
3892 RTL_W8(Config1, MEMMAP | IOMAP | VPD | PMEnable); 3907 RTL_W8(Config1, MEMMAP | IOMAP | VPD | PMEnable);
3893 RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en); 3908 RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
3894
3895 RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R810X_CPCMD_QUIRK_MASK);
3896} 3909}
3897 3910
3898static void rtl_hw_start_8102e_3(void __iomem *ioaddr, struct pci_dev *pdev) 3911static void rtl_hw_start_8102e_3(void __iomem *ioaddr, struct pci_dev *pdev)
@@ -3918,6 +3931,8 @@ static void rtl_hw_start_8101(struct net_device *dev)
3918 } 3931 }
3919 } 3932 }
3920 3933
3934 RTL_W8(Cfg9346, Cfg9346_Unlock);
3935
3921 switch (tp->mac_version) { 3936 switch (tp->mac_version) {
3922 case RTL_GIGA_MAC_VER_07: 3937 case RTL_GIGA_MAC_VER_07:
3923 rtl_hw_start_8102e_1(ioaddr, pdev); 3938 rtl_hw_start_8102e_1(ioaddr, pdev);
@@ -3932,14 +3947,13 @@ static void rtl_hw_start_8101(struct net_device *dev)
3932 break; 3947 break;
3933 } 3948 }
3934 3949
3935 RTL_W8(Cfg9346, Cfg9346_Unlock); 3950 RTL_W8(Cfg9346, Cfg9346_Lock);
3936 3951
3937 RTL_W8(MaxTxPacketSize, TxPacketMax); 3952 RTL_W8(MaxTxPacketSize, TxPacketMax);
3938 3953
3939 rtl_set_rx_max_size(ioaddr, rx_buf_sz); 3954 rtl_set_rx_max_size(ioaddr, rx_buf_sz);
3940 3955
3941 tp->cp_cmd |= rtl_rw_cpluscmd(ioaddr) | PCIMulRW; 3956 tp->cp_cmd &= ~R810X_CPCMD_QUIRK_MASK;
3942
3943 RTL_W16(CPlusCmd, tp->cp_cmd); 3957 RTL_W16(CPlusCmd, tp->cp_cmd);
3944 3958
3945 RTL_W16(IntrMitigate, 0x0000); 3959 RTL_W16(IntrMitigate, 0x0000);
@@ -3949,14 +3963,10 @@ static void rtl_hw_start_8101(struct net_device *dev)
3949 RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb); 3963 RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
3950 rtl_set_rx_tx_config_registers(tp); 3964 rtl_set_rx_tx_config_registers(tp);
3951 3965
3952 RTL_W8(Cfg9346, Cfg9346_Lock);
3953
3954 RTL_R8(IntrMask); 3966 RTL_R8(IntrMask);
3955 3967
3956 rtl_set_rx_mode(dev); 3968 rtl_set_rx_mode(dev);
3957 3969
3958 RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
3959
3960 RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xf000); 3970 RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xf000);
3961 3971
3962 RTL_W16(IntrMask, tp->intr_event); 3972 RTL_W16(IntrMask, tp->intr_event);
diff --git a/drivers/net/sfc/ethtool.c b/drivers/net/sfc/ethtool.c
index 0e8bb19ed60d..ca886d98bdc7 100644
--- a/drivers/net/sfc/ethtool.c
+++ b/drivers/net/sfc/ethtool.c
@@ -569,9 +569,14 @@ static void efx_ethtool_self_test(struct net_device *net_dev,
569 struct ethtool_test *test, u64 *data) 569 struct ethtool_test *test, u64 *data)
570{ 570{
571 struct efx_nic *efx = netdev_priv(net_dev); 571 struct efx_nic *efx = netdev_priv(net_dev);
572 struct efx_self_tests efx_tests; 572 struct efx_self_tests *efx_tests;
573 int already_up; 573 int already_up;
574 int rc; 574 int rc = -ENOMEM;
575
576 efx_tests = kzalloc(sizeof(*efx_tests), GFP_KERNEL);
577 if (!efx_tests)
578 goto fail;
579
575 580
576 ASSERT_RTNL(); 581 ASSERT_RTNL();
577 if (efx->state != STATE_RUNNING) { 582 if (efx->state != STATE_RUNNING) {
@@ -589,13 +594,11 @@ static void efx_ethtool_self_test(struct net_device *net_dev,
589 if (rc) { 594 if (rc) {
590 netif_err(efx, drv, efx->net_dev, 595 netif_err(efx, drv, efx->net_dev,
591 "failed opening device.\n"); 596 "failed opening device.\n");
592 goto fail2; 597 goto fail1;
593 } 598 }
594 } 599 }
595 600
596 memset(&efx_tests, 0, sizeof(efx_tests)); 601 rc = efx_selftest(efx, efx_tests, test->flags);
597
598 rc = efx_selftest(efx, &efx_tests, test->flags);
599 602
600 if (!already_up) 603 if (!already_up)
601 dev_close(efx->net_dev); 604 dev_close(efx->net_dev);
@@ -604,10 +607,11 @@ static void efx_ethtool_self_test(struct net_device *net_dev,
604 rc == 0 ? "passed" : "failed", 607 rc == 0 ? "passed" : "failed",
605 (test->flags & ETH_TEST_FL_OFFLINE) ? "off" : "on"); 608 (test->flags & ETH_TEST_FL_OFFLINE) ? "off" : "on");
606 609
607 fail2: 610fail1:
608 fail1:
609 /* Fill ethtool results structures */ 611 /* Fill ethtool results structures */
610 efx_ethtool_fill_self_tests(efx, &efx_tests, NULL, data); 612 efx_ethtool_fill_self_tests(efx, efx_tests, NULL, data);
613 kfree(efx_tests);
614fail:
611 if (rc) 615 if (rc)
612 test->flags |= ETH_TEST_FL_FAILED; 616 test->flags |= ETH_TEST_FL_FAILED;
613} 617}
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index 42daf98ba736..35b28f42d208 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -3856,9 +3856,6 @@ static struct net_device *skge_devinit(struct skge_hw *hw, int port,
3856 memcpy_fromio(dev->dev_addr, hw->regs + B2_MAC_1 + port*8, ETH_ALEN); 3856 memcpy_fromio(dev->dev_addr, hw->regs + B2_MAC_1 + port*8, ETH_ALEN);
3857 memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); 3857 memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
3858 3858
3859 /* device is off until link detection */
3860 netif_carrier_off(dev);
3861
3862 return dev; 3859 return dev;
3863} 3860}
3864 3861
diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
index 64bfdae5956f..d70bde95460b 100644
--- a/drivers/net/smsc911x.c
+++ b/drivers/net/smsc911x.c
@@ -1178,6 +1178,11 @@ static int smsc911x_open(struct net_device *dev)
1178 smsc911x_reg_write(pdata, HW_CFG, 0x00050000); 1178 smsc911x_reg_write(pdata, HW_CFG, 0x00050000);
1179 smsc911x_reg_write(pdata, AFC_CFG, 0x006E3740); 1179 smsc911x_reg_write(pdata, AFC_CFG, 0x006E3740);
1180 1180
1181 /* Increase the legal frame size of VLAN tagged frames to 1522 bytes */
1182 spin_lock_irq(&pdata->mac_lock);
1183 smsc911x_mac_write(pdata, VLAN1, ETH_P_8021Q);
1184 spin_unlock_irq(&pdata->mac_lock);
1185
1181 /* Make sure EEPROM has finished loading before setting GPIO_CFG */ 1186 /* Make sure EEPROM has finished loading before setting GPIO_CFG */
1182 timeout = 50; 1187 timeout = 50;
1183 while ((smsc911x_reg_read(pdata, E2P_CMD) & E2P_CMD_EPC_BUSY_) && 1188 while ((smsc911x_reg_read(pdata, E2P_CMD) & E2P_CMD_EPC_BUSY_) &&
diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c
index 02b622e3b9fb..5002f5be47be 100644
--- a/drivers/net/usb/dm9601.c
+++ b/drivers/net/usb/dm9601.c
@@ -651,6 +651,10 @@ static const struct usb_device_id products[] = {
651 .driver_info = (unsigned long)&dm9601_info, 651 .driver_info = (unsigned long)&dm9601_info,
652 }, 652 },
653 { 653 {
654 USB_DEVICE(0x0fe6, 0x9700), /* DM9601 USB to Fast Ethernet Adapter */
655 .driver_info = (unsigned long)&dm9601_info,
656 },
657 {
654 USB_DEVICE(0x0a46, 0x9000), /* DM9000E */ 658 USB_DEVICE(0x0a46, 0x9000), /* DM9000E */
655 .driver_info = (unsigned long)&dm9601_info, 659 .driver_info = (unsigned long)&dm9601_info,
656 }, 660 },
diff --git a/drivers/net/wireless/ath/ath5k/phy.c b/drivers/net/wireless/ath/ath5k/phy.c
index 78c26fdccad1..62ce2f4e8605 100644
--- a/drivers/net/wireless/ath/ath5k/phy.c
+++ b/drivers/net/wireless/ath/ath5k/phy.c
@@ -282,6 +282,34 @@ int ath5k_hw_phy_disable(struct ath5k_hw *ah)
282 return 0; 282 return 0;
283} 283}
284 284
285/*
286 * Wait for synth to settle
287 */
288static void ath5k_hw_wait_for_synth(struct ath5k_hw *ah,
289 struct ieee80211_channel *channel)
290{
291 /*
292 * On 5211+ read activation -> rx delay
293 * and use it (100ns steps).
294 */
295 if (ah->ah_version != AR5K_AR5210) {
296 u32 delay;
297 delay = ath5k_hw_reg_read(ah, AR5K_PHY_RX_DELAY) &
298 AR5K_PHY_RX_DELAY_M;
299 delay = (channel->hw_value & CHANNEL_CCK) ?
300 ((delay << 2) / 22) : (delay / 10);
301 if (ah->ah_bwmode == AR5K_BWMODE_10MHZ)
302 delay = delay << 1;
303 if (ah->ah_bwmode == AR5K_BWMODE_5MHZ)
304 delay = delay << 2;
305 /* XXX: /2 on turbo ? Let's be safe
306 * for now */
307 udelay(100 + delay);
308 } else {
309 mdelay(1);
310 }
311}
312
285 313
286/**********************\ 314/**********************\
287* RF Gain optimization * 315* RF Gain optimization *
@@ -1253,6 +1281,7 @@ static int ath5k_hw_channel(struct ath5k_hw *ah,
1253 case AR5K_RF5111: 1281 case AR5K_RF5111:
1254 ret = ath5k_hw_rf5111_channel(ah, channel); 1282 ret = ath5k_hw_rf5111_channel(ah, channel);
1255 break; 1283 break;
1284 case AR5K_RF2317:
1256 case AR5K_RF2425: 1285 case AR5K_RF2425:
1257 ret = ath5k_hw_rf2425_channel(ah, channel); 1286 ret = ath5k_hw_rf2425_channel(ah, channel);
1258 break; 1287 break;
@@ -3237,6 +3266,13 @@ int ath5k_hw_phy_init(struct ath5k_hw *ah, struct ieee80211_channel *channel,
3237 /* Failed */ 3266 /* Failed */
3238 if (i >= 100) 3267 if (i >= 100)
3239 return -EIO; 3268 return -EIO;
3269
3270 /* Set channel and wait for synth */
3271 ret = ath5k_hw_channel(ah, channel);
3272 if (ret)
3273 return ret;
3274
3275 ath5k_hw_wait_for_synth(ah, channel);
3240 } 3276 }
3241 3277
3242 /* 3278 /*
@@ -3251,13 +3287,53 @@ int ath5k_hw_phy_init(struct ath5k_hw *ah, struct ieee80211_channel *channel,
3251 if (ret) 3287 if (ret)
3252 return ret; 3288 return ret;
3253 3289
3290 /* Write OFDM timings on 5212*/
3291 if (ah->ah_version == AR5K_AR5212 &&
3292 channel->hw_value & CHANNEL_OFDM) {
3293
3294 ret = ath5k_hw_write_ofdm_timings(ah, channel);
3295 if (ret)
3296 return ret;
3297
3298 /* Spur info is available only from EEPROM versions
3299 * greater than 5.3, but the EEPROM routines will use
3300 * static values for older versions */
3301 if (ah->ah_mac_srev >= AR5K_SREV_AR5424)
3302 ath5k_hw_set_spur_mitigation_filter(ah,
3303 channel);
3304 }
3305
3306 /* If we used fast channel switching
3307 * we are done, release RF bus and
3308 * fire up NF calibration.
3309 *
3310 * Note: Only NF calibration due to
3311 * channel change, not AGC calibration
3312 * since AGC is still running !
3313 */
3314 if (fast) {
3315 /*
3316 * Release RF Bus grant
3317 */
3318 AR5K_REG_DISABLE_BITS(ah, AR5K_PHY_RFBUS_REQ,
3319 AR5K_PHY_RFBUS_REQ_REQUEST);
3320
3321 /*
3322 * Start NF calibration
3323 */
3324 AR5K_REG_ENABLE_BITS(ah, AR5K_PHY_AGCCTL,
3325 AR5K_PHY_AGCCTL_NF);
3326
3327 return ret;
3328 }
3329
3254 /* 3330 /*
3255 * For 5210 we do all initialization using 3331 * For 5210 we do all initialization using
3256 * initvals, so we don't have to modify 3332 * initvals, so we don't have to modify
3257 * any settings (5210 also only supports 3333 * any settings (5210 also only supports
3258 * a/aturbo modes) 3334 * a/aturbo modes)
3259 */ 3335 */
3260 if ((ah->ah_version != AR5K_AR5210) && !fast) { 3336 if (ah->ah_version != AR5K_AR5210) {
3261 3337
3262 /* 3338 /*
3263 * Write initial RF gain settings 3339 * Write initial RF gain settings
@@ -3276,22 +3352,6 @@ int ath5k_hw_phy_init(struct ath5k_hw *ah, struct ieee80211_channel *channel,
3276 if (ret) 3352 if (ret)
3277 return ret; 3353 return ret;
3278 3354
3279 /* Write OFDM timings on 5212*/
3280 if (ah->ah_version == AR5K_AR5212 &&
3281 channel->hw_value & CHANNEL_OFDM) {
3282
3283 ret = ath5k_hw_write_ofdm_timings(ah, channel);
3284 if (ret)
3285 return ret;
3286
3287 /* Spur info is available only from EEPROM versions
3288 * greater than 5.3, but the EEPROM routines will use
3289 * static values for older versions */
3290 if (ah->ah_mac_srev >= AR5K_SREV_AR5424)
3291 ath5k_hw_set_spur_mitigation_filter(ah,
3292 channel);
3293 }
3294
3295 /*Enable/disable 802.11b mode on 5111 3355 /*Enable/disable 802.11b mode on 5111
3296 (enable 2111 frequency converter + CCK)*/ 3356 (enable 2111 frequency converter + CCK)*/
3297 if (ah->ah_radio == AR5K_RF5111) { 3357 if (ah->ah_radio == AR5K_RF5111) {
@@ -3322,47 +3382,20 @@ int ath5k_hw_phy_init(struct ath5k_hw *ah, struct ieee80211_channel *channel,
3322 */ 3382 */
3323 ath5k_hw_reg_write(ah, AR5K_PHY_ACT_ENABLE, AR5K_PHY_ACT); 3383 ath5k_hw_reg_write(ah, AR5K_PHY_ACT_ENABLE, AR5K_PHY_ACT);
3324 3384
3385 ath5k_hw_wait_for_synth(ah, channel);
3386
3325 /* 3387 /*
3326 * On 5211+ read activation -> rx delay 3388 * Perform ADC test to see if baseband is ready
3327 * and use it. 3389 * Set tx hold and check adc test register
3328 */ 3390 */
3329 if (ah->ah_version != AR5K_AR5210) { 3391 phy_tst1 = ath5k_hw_reg_read(ah, AR5K_PHY_TST1);
3330 u32 delay; 3392 ath5k_hw_reg_write(ah, AR5K_PHY_TST1_TXHOLD, AR5K_PHY_TST1);
3331 delay = ath5k_hw_reg_read(ah, AR5K_PHY_RX_DELAY) & 3393 for (i = 0; i <= 20; i++) {
3332 AR5K_PHY_RX_DELAY_M; 3394 if (!(ath5k_hw_reg_read(ah, AR5K_PHY_ADC_TEST) & 0x10))
3333 delay = (channel->hw_value & CHANNEL_CCK) ? 3395 break;
3334 ((delay << 2) / 22) : (delay / 10); 3396 udelay(200);
3335 if (ah->ah_bwmode == AR5K_BWMODE_10MHZ)
3336 delay = delay << 1;
3337 if (ah->ah_bwmode == AR5K_BWMODE_5MHZ)
3338 delay = delay << 2;
3339 /* XXX: /2 on turbo ? Let's be safe
3340 * for now */
3341 udelay(100 + delay);
3342 } else {
3343 mdelay(1);
3344 }
3345
3346 if (fast)
3347 /*
3348 * Release RF Bus grant
3349 */
3350 AR5K_REG_DISABLE_BITS(ah, AR5K_PHY_RFBUS_REQ,
3351 AR5K_PHY_RFBUS_REQ_REQUEST);
3352 else {
3353 /*
3354 * Perform ADC test to see if baseband is ready
3355 * Set tx hold and check adc test register
3356 */
3357 phy_tst1 = ath5k_hw_reg_read(ah, AR5K_PHY_TST1);
3358 ath5k_hw_reg_write(ah, AR5K_PHY_TST1_TXHOLD, AR5K_PHY_TST1);
3359 for (i = 0; i <= 20; i++) {
3360 if (!(ath5k_hw_reg_read(ah, AR5K_PHY_ADC_TEST) & 0x10))
3361 break;
3362 udelay(200);
3363 }
3364 ath5k_hw_reg_write(ah, phy_tst1, AR5K_PHY_TST1);
3365 } 3397 }
3398 ath5k_hw_reg_write(ah, phy_tst1, AR5K_PHY_TST1);
3366 3399
3367 /* 3400 /*
3368 * Start automatic gain control calibration 3401 * Start automatic gain control calibration
diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h
index 23838e37d45f..1a7fa6ea4cf5 100644
--- a/drivers/net/wireless/ath/ath9k/ath9k.h
+++ b/drivers/net/wireless/ath/ath9k/ath9k.h
@@ -21,7 +21,6 @@
21#include <linux/device.h> 21#include <linux/device.h>
22#include <linux/leds.h> 22#include <linux/leds.h>
23#include <linux/completion.h> 23#include <linux/completion.h>
24#include <linux/pm_qos_params.h>
25 24
26#include "debug.h" 25#include "debug.h"
27#include "common.h" 26#include "common.h"
@@ -57,8 +56,6 @@ struct ath_node;
57 56
58#define A_MAX(a, b) ((a) > (b) ? (a) : (b)) 57#define A_MAX(a, b) ((a) > (b) ? (a) : (b))
59 58
60#define ATH9K_PM_QOS_DEFAULT_VALUE 55
61
62#define TSF_TO_TU(_h,_l) \ 59#define TSF_TO_TU(_h,_l) \
63 ((((u32)(_h)) << 22) | (((u32)(_l)) >> 10)) 60 ((((u32)(_h)) << 22) | (((u32)(_l)) >> 10))
64 61
@@ -633,8 +630,6 @@ struct ath_softc {
633 struct ath_descdma txsdma; 630 struct ath_descdma txsdma;
634 631
635 struct ath_ant_comb ant_comb; 632 struct ath_ant_comb ant_comb;
636
637 struct pm_qos_request_list pm_qos_req;
638}; 633};
639 634
640struct ath_wiphy { 635struct ath_wiphy {
@@ -666,7 +661,6 @@ static inline void ath_read_cachesize(struct ath_common *common, int *csz)
666extern struct ieee80211_ops ath9k_ops; 661extern struct ieee80211_ops ath9k_ops;
667extern int ath9k_modparam_nohwcrypt; 662extern int ath9k_modparam_nohwcrypt;
668extern int led_blink; 663extern int led_blink;
669extern int ath9k_pm_qos_value;
670extern bool is_ath9k_unloaded; 664extern bool is_ath9k_unloaded;
671 665
672irqreturn_t ath_isr(int irq, void *dev); 666irqreturn_t ath_isr(int irq, void *dev);
diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c
index 5ab3084eb9cb..07b1633b7f3f 100644
--- a/drivers/net/wireless/ath/ath9k/hif_usb.c
+++ b/drivers/net/wireless/ath/ath9k/hif_usb.c
@@ -219,8 +219,9 @@ static int __hif_usb_tx(struct hif_device_usb *hif_dev)
219 struct tx_buf *tx_buf = NULL; 219 struct tx_buf *tx_buf = NULL;
220 struct sk_buff *nskb = NULL; 220 struct sk_buff *nskb = NULL;
221 int ret = 0, i; 221 int ret = 0, i;
222 u16 *hdr, tx_skb_cnt = 0; 222 u16 tx_skb_cnt = 0;
223 u8 *buf; 223 u8 *buf;
224 __le16 *hdr;
224 225
225 if (hif_dev->tx.tx_skb_cnt == 0) 226 if (hif_dev->tx.tx_skb_cnt == 0)
226 return 0; 227 return 0;
@@ -245,9 +246,9 @@ static int __hif_usb_tx(struct hif_device_usb *hif_dev)
245 246
246 buf = tx_buf->buf; 247 buf = tx_buf->buf;
247 buf += tx_buf->offset; 248 buf += tx_buf->offset;
248 hdr = (u16 *)buf; 249 hdr = (__le16 *)buf;
249 *hdr++ = nskb->len; 250 *hdr++ = cpu_to_le16(nskb->len);
250 *hdr++ = ATH_USB_TX_STREAM_MODE_TAG; 251 *hdr++ = cpu_to_le16(ATH_USB_TX_STREAM_MODE_TAG);
251 buf += 4; 252 buf += 4;
252 memcpy(buf, nskb->data, nskb->len); 253 memcpy(buf, nskb->data, nskb->len);
253 tx_buf->len = nskb->len + 4; 254 tx_buf->len = nskb->len + 4;
diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index 087a6a95edd5..a033d01bf8a0 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -41,10 +41,6 @@ static int ath9k_btcoex_enable;
41module_param_named(btcoex_enable, ath9k_btcoex_enable, int, 0444); 41module_param_named(btcoex_enable, ath9k_btcoex_enable, int, 0444);
42MODULE_PARM_DESC(btcoex_enable, "Enable wifi-BT coexistence"); 42MODULE_PARM_DESC(btcoex_enable, "Enable wifi-BT coexistence");
43 43
44int ath9k_pm_qos_value = ATH9K_PM_QOS_DEFAULT_VALUE;
45module_param_named(pmqos, ath9k_pm_qos_value, int, S_IRUSR | S_IRGRP | S_IROTH);
46MODULE_PARM_DESC(pmqos, "User specified PM-QOS value");
47
48bool is_ath9k_unloaded; 44bool is_ath9k_unloaded;
49/* We use the hw_value as an index into our private channel structure */ 45/* We use the hw_value as an index into our private channel structure */
50 46
@@ -762,9 +758,6 @@ int ath9k_init_device(u16 devid, struct ath_softc *sc, u16 subsysid,
762 ath_init_leds(sc); 758 ath_init_leds(sc);
763 ath_start_rfkill_poll(sc); 759 ath_start_rfkill_poll(sc);
764 760
765 pm_qos_add_request(&sc->pm_qos_req, PM_QOS_CPU_DMA_LATENCY,
766 PM_QOS_DEFAULT_VALUE);
767
768 return 0; 761 return 0;
769 762
770error_world: 763error_world:
@@ -831,7 +824,6 @@ void ath9k_deinit_device(struct ath_softc *sc)
831 } 824 }
832 825
833 ieee80211_unregister_hw(hw); 826 ieee80211_unregister_hw(hw);
834 pm_qos_remove_request(&sc->pm_qos_req);
835 ath_rx_cleanup(sc); 827 ath_rx_cleanup(sc);
836 ath_tx_cleanup(sc); 828 ath_tx_cleanup(sc);
837 ath9k_deinit_softc(sc); 829 ath9k_deinit_softc(sc);
diff --git a/drivers/net/wireless/ath/ath9k/mac.c b/drivers/net/wireless/ath/ath9k/mac.c
index 180170d3ce25..2915b11edefb 100644
--- a/drivers/net/wireless/ath/ath9k/mac.c
+++ b/drivers/net/wireless/ath/ath9k/mac.c
@@ -885,7 +885,7 @@ void ath9k_hw_set_interrupts(struct ath_hw *ah, enum ath9k_int ints)
885 struct ath_common *common = ath9k_hw_common(ah); 885 struct ath_common *common = ath9k_hw_common(ah);
886 886
887 if (!(ints & ATH9K_INT_GLOBAL)) 887 if (!(ints & ATH9K_INT_GLOBAL))
888 ath9k_hw_enable_interrupts(ah); 888 ath9k_hw_disable_interrupts(ah);
889 889
890 ath_dbg(common, ATH_DBG_INTERRUPT, "0x%x => 0x%x\n", omask, ints); 890 ath_dbg(common, ATH_DBG_INTERRUPT, "0x%x => 0x%x\n", omask, ints);
891 891
@@ -963,7 +963,8 @@ void ath9k_hw_set_interrupts(struct ath_hw *ah, enum ath9k_int ints)
963 REG_CLR_BIT(ah, AR_IMR_S5, AR_IMR_S5_TIM_TIMER); 963 REG_CLR_BIT(ah, AR_IMR_S5, AR_IMR_S5_TIM_TIMER);
964 } 964 }
965 965
966 ath9k_hw_enable_interrupts(ah); 966 if (ints & ATH9K_INT_GLOBAL)
967 ath9k_hw_enable_interrupts(ah);
967 968
968 return; 969 return;
969} 970}
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index da5c64597c1f..a09d15f7aa6e 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -1173,12 +1173,6 @@ static int ath9k_start(struct ieee80211_hw *hw)
1173 ath9k_btcoex_timer_resume(sc); 1173 ath9k_btcoex_timer_resume(sc);
1174 } 1174 }
1175 1175
1176 /* User has the option to provide pm-qos value as a module
1177 * parameter rather than using the default value of
1178 * 'ATH9K_PM_QOS_DEFAULT_VALUE'.
1179 */
1180 pm_qos_update_request(&sc->pm_qos_req, ath9k_pm_qos_value);
1181
1182 if (ah->caps.pcie_lcr_extsync_en && common->bus_ops->extn_synch_en) 1176 if (ah->caps.pcie_lcr_extsync_en && common->bus_ops->extn_synch_en)
1183 common->bus_ops->extn_synch_en(common); 1177 common->bus_ops->extn_synch_en(common);
1184 1178
@@ -1345,8 +1339,6 @@ static void ath9k_stop(struct ieee80211_hw *hw)
1345 1339
1346 sc->sc_flags |= SC_OP_INVALID; 1340 sc->sc_flags |= SC_OP_INVALID;
1347 1341
1348 pm_qos_update_request(&sc->pm_qos_req, PM_QOS_DEFAULT_VALUE);
1349
1350 mutex_unlock(&sc->mutex); 1342 mutex_unlock(&sc->mutex);
1351 1343
1352 ath_dbg(common, ATH_DBG_CONFIG, "Driver halt\n"); 1344 ath_dbg(common, ATH_DBG_CONFIG, "Driver halt\n");
diff --git a/drivers/net/wireless/ath/carl9170/usb.c b/drivers/net/wireless/ath/carl9170/usb.c
index 537732e5964f..f82c400be288 100644
--- a/drivers/net/wireless/ath/carl9170/usb.c
+++ b/drivers/net/wireless/ath/carl9170/usb.c
@@ -118,6 +118,8 @@ static struct usb_device_id carl9170_usb_ids[] = {
118 { USB_DEVICE(0x057c, 0x8402) }, 118 { USB_DEVICE(0x057c, 0x8402) },
119 /* Qwest/Actiontec 802AIN Wireless N USB Network Adapter */ 119 /* Qwest/Actiontec 802AIN Wireless N USB Network Adapter */
120 { USB_DEVICE(0x1668, 0x1200) }, 120 { USB_DEVICE(0x1668, 0x1200) },
121 /* Airlive X.USB a/b/g/n */
122 { USB_DEVICE(0x1b75, 0x9170) },
121 123
122 /* terminate */ 124 /* terminate */
123 {} 125 {}
diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c
index 79ab0a6b1386..537fb8c84e3a 100644
--- a/drivers/net/wireless/iwlwifi/iwl-5000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-5000.c
@@ -51,7 +51,7 @@
51#include "iwl-agn-debugfs.h" 51#include "iwl-agn-debugfs.h"
52 52
53/* Highest firmware API version supported */ 53/* Highest firmware API version supported */
54#define IWL5000_UCODE_API_MAX 2 54#define IWL5000_UCODE_API_MAX 5
55#define IWL5150_UCODE_API_MAX 2 55#define IWL5150_UCODE_API_MAX 2
56 56
57/* Lowest firmware API version supported */ 57/* Lowest firmware API version supported */
diff --git a/drivers/net/wireless/p54/p54pci.c b/drivers/net/wireless/p54/p54pci.c
index 1eacba4daa5b..0494d7b102d4 100644
--- a/drivers/net/wireless/p54/p54pci.c
+++ b/drivers/net/wireless/p54/p54pci.c
@@ -199,6 +199,7 @@ static void p54p_check_rx_ring(struct ieee80211_hw *dev, u32 *index,
199 while (i != idx) { 199 while (i != idx) {
200 u16 len; 200 u16 len;
201 struct sk_buff *skb; 201 struct sk_buff *skb;
202 dma_addr_t dma_addr;
202 desc = &ring[i]; 203 desc = &ring[i];
203 len = le16_to_cpu(desc->len); 204 len = le16_to_cpu(desc->len);
204 skb = rx_buf[i]; 205 skb = rx_buf[i];
@@ -216,17 +217,20 @@ static void p54p_check_rx_ring(struct ieee80211_hw *dev, u32 *index,
216 217
217 len = priv->common.rx_mtu; 218 len = priv->common.rx_mtu;
218 } 219 }
220 dma_addr = le32_to_cpu(desc->host_addr);
221 pci_dma_sync_single_for_cpu(priv->pdev, dma_addr,
222 priv->common.rx_mtu + 32, PCI_DMA_FROMDEVICE);
219 skb_put(skb, len); 223 skb_put(skb, len);
220 224
221 if (p54_rx(dev, skb)) { 225 if (p54_rx(dev, skb)) {
222 pci_unmap_single(priv->pdev, 226 pci_unmap_single(priv->pdev, dma_addr,
223 le32_to_cpu(desc->host_addr), 227 priv->common.rx_mtu + 32, PCI_DMA_FROMDEVICE);
224 priv->common.rx_mtu + 32,
225 PCI_DMA_FROMDEVICE);
226 rx_buf[i] = NULL; 228 rx_buf[i] = NULL;
227 desc->host_addr = 0; 229 desc->host_addr = cpu_to_le32(0);
228 } else { 230 } else {
229 skb_trim(skb, 0); 231 skb_trim(skb, 0);
232 pci_dma_sync_single_for_device(priv->pdev, dma_addr,
233 priv->common.rx_mtu + 32, PCI_DMA_FROMDEVICE);
230 desc->len = cpu_to_le16(priv->common.rx_mtu + 32); 234 desc->len = cpu_to_le16(priv->common.rx_mtu + 32);
231 } 235 }
232 236
diff --git a/drivers/net/wireless/p54/p54usb.c b/drivers/net/wireless/p54/p54usb.c
index 21713a7638c4..9b344a921e74 100644
--- a/drivers/net/wireless/p54/p54usb.c
+++ b/drivers/net/wireless/p54/p54usb.c
@@ -98,6 +98,7 @@ static struct usb_device_id p54u_table[] __devinitdata = {
98 {USB_DEVICE(0x1413, 0x5400)}, /* Telsey 802.11g USB2.0 Adapter */ 98 {USB_DEVICE(0x1413, 0x5400)}, /* Telsey 802.11g USB2.0 Adapter */
99 {USB_DEVICE(0x1435, 0x0427)}, /* Inventel UR054G */ 99 {USB_DEVICE(0x1435, 0x0427)}, /* Inventel UR054G */
100 {USB_DEVICE(0x1668, 0x1050)}, /* Actiontec 802UIG-1 */ 100 {USB_DEVICE(0x1668, 0x1050)}, /* Actiontec 802UIG-1 */
101 {USB_DEVICE(0x1740, 0x1000)}, /* Senao NUB-350 */
101 {USB_DEVICE(0x2001, 0x3704)}, /* DLink DWL-G122 rev A2 */ 102 {USB_DEVICE(0x2001, 0x3704)}, /* DLink DWL-G122 rev A2 */
102 {USB_DEVICE(0x2001, 0x3705)}, /* D-Link DWL-G120 rev C1 */ 103 {USB_DEVICE(0x2001, 0x3705)}, /* D-Link DWL-G120 rev C1 */
103 {USB_DEVICE(0x413c, 0x5513)}, /* Dell WLA3310 USB Wireless Adapter */ 104 {USB_DEVICE(0x413c, 0x5513)}, /* Dell WLA3310 USB Wireless Adapter */
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index 848cc2cce247..518542b4bf9e 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -2597,6 +2597,9 @@ static int rndis_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
2597 __le32 mode; 2597 __le32 mode;
2598 int ret; 2598 int ret;
2599 2599
2600 if (priv->device_type != RNDIS_BCM4320B)
2601 return -ENOTSUPP;
2602
2600 netdev_dbg(usbdev->net, "%s(): %s, %d\n", __func__, 2603 netdev_dbg(usbdev->net, "%s(): %s, %d\n", __func__,
2601 enabled ? "enabled" : "disabled", 2604 enabled ? "enabled" : "disabled",
2602 timeout); 2605 timeout);
diff --git a/drivers/net/wireless/rt2x00/rt2800pci.c b/drivers/net/wireless/rt2x00/rt2800pci.c
index aa97971a38af..3b3f1e45ab3e 100644
--- a/drivers/net/wireless/rt2x00/rt2800pci.c
+++ b/drivers/net/wireless/rt2x00/rt2800pci.c
@@ -652,6 +652,12 @@ static void rt2800pci_fill_rxdone(struct queue_entry *entry,
652 */ 652 */
653 rxdesc->flags |= RX_FLAG_IV_STRIPPED; 653 rxdesc->flags |= RX_FLAG_IV_STRIPPED;
654 654
655 /*
656 * The hardware has already checked the Michael Mic and has
657 * stripped it from the frame. Signal this to mac80211.
658 */
659 rxdesc->flags |= RX_FLAG_MMIC_STRIPPED;
660
655 if (rxdesc->cipher_status == RX_CRYPTO_SUCCESS) 661 if (rxdesc->cipher_status == RX_CRYPTO_SUCCESS)
656 rxdesc->flags |= RX_FLAG_DECRYPTED; 662 rxdesc->flags |= RX_FLAG_DECRYPTED;
657 else if (rxdesc->cipher_status == RX_CRYPTO_FAIL_MIC) 663 else if (rxdesc->cipher_status == RX_CRYPTO_FAIL_MIC)
@@ -1065,6 +1071,8 @@ static DEFINE_PCI_DEVICE_TABLE(rt2800pci_device_table) = {
1065 { PCI_DEVICE(0x1814, 0x3390), PCI_DEVICE_DATA(&rt2800pci_ops) }, 1071 { PCI_DEVICE(0x1814, 0x3390), PCI_DEVICE_DATA(&rt2800pci_ops) },
1066#endif 1072#endif
1067#ifdef CONFIG_RT2800PCI_RT35XX 1073#ifdef CONFIG_RT2800PCI_RT35XX
1074 { PCI_DEVICE(0x1432, 0x7711), PCI_DEVICE_DATA(&rt2800pci_ops) },
1075 { PCI_DEVICE(0x1432, 0x7722), PCI_DEVICE_DATA(&rt2800pci_ops) },
1068 { PCI_DEVICE(0x1814, 0x3060), PCI_DEVICE_DATA(&rt2800pci_ops) }, 1076 { PCI_DEVICE(0x1814, 0x3060), PCI_DEVICE_DATA(&rt2800pci_ops) },
1069 { PCI_DEVICE(0x1814, 0x3062), PCI_DEVICE_DATA(&rt2800pci_ops) }, 1077 { PCI_DEVICE(0x1814, 0x3062), PCI_DEVICE_DATA(&rt2800pci_ops) },
1070 { PCI_DEVICE(0x1814, 0x3562), PCI_DEVICE_DATA(&rt2800pci_ops) }, 1078 { PCI_DEVICE(0x1814, 0x3562), PCI_DEVICE_DATA(&rt2800pci_ops) },
diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c
index b97a4a54ff4c..197a36c05fda 100644
--- a/drivers/net/wireless/rt2x00/rt2800usb.c
+++ b/drivers/net/wireless/rt2x00/rt2800usb.c
@@ -486,6 +486,12 @@ static void rt2800usb_fill_rxdone(struct queue_entry *entry,
486 */ 486 */
487 rxdesc->flags |= RX_FLAG_IV_STRIPPED; 487 rxdesc->flags |= RX_FLAG_IV_STRIPPED;
488 488
489 /*
490 * The hardware has already checked the Michael Mic and has
491 * stripped it from the frame. Signal this to mac80211.
492 */
493 rxdesc->flags |= RX_FLAG_MMIC_STRIPPED;
494
489 if (rxdesc->cipher_status == RX_CRYPTO_SUCCESS) 495 if (rxdesc->cipher_status == RX_CRYPTO_SUCCESS)
490 rxdesc->flags |= RX_FLAG_DECRYPTED; 496 rxdesc->flags |= RX_FLAG_DECRYPTED;
491 else if (rxdesc->cipher_status == RX_CRYPTO_FAIL_MIC) 497 else if (rxdesc->cipher_status == RX_CRYPTO_FAIL_MIC)
diff --git a/drivers/nfc/Kconfig b/drivers/nfc/Kconfig
index ffedfd492754..ea1580085347 100644
--- a/drivers/nfc/Kconfig
+++ b/drivers/nfc/Kconfig
@@ -3,7 +3,7 @@
3# 3#
4 4
5menuconfig NFC_DEVICES 5menuconfig NFC_DEVICES
6 bool "NFC devices" 6 bool "Near Field Communication (NFC) devices"
7 default n 7 default n
8 ---help--- 8 ---help---
9 You'll have to say Y if your computer contains an NFC device that 9 You'll have to say Y if your computer contains an NFC device that
diff --git a/drivers/nfc/pn544.c b/drivers/nfc/pn544.c
index bae647264dd6..724f65d8f9e4 100644
--- a/drivers/nfc/pn544.c
+++ b/drivers/nfc/pn544.c
@@ -60,7 +60,7 @@ enum pn544_irq {
60struct pn544_info { 60struct pn544_info {
61 struct miscdevice miscdev; 61 struct miscdevice miscdev;
62 struct i2c_client *i2c_dev; 62 struct i2c_client *i2c_dev;
63 struct regulator_bulk_data regs[2]; 63 struct regulator_bulk_data regs[3];
64 64
65 enum pn544_state state; 65 enum pn544_state state;
66 wait_queue_head_t read_wait; 66 wait_queue_head_t read_wait;
@@ -74,6 +74,7 @@ struct pn544_info {
74 74
75static const char reg_vdd_io[] = "Vdd_IO"; 75static const char reg_vdd_io[] = "Vdd_IO";
76static const char reg_vbat[] = "VBat"; 76static const char reg_vbat[] = "VBat";
77static const char reg_vsim[] = "VSim";
77 78
78/* sysfs interface */ 79/* sysfs interface */
79static ssize_t pn544_test(struct device *dev, 80static ssize_t pn544_test(struct device *dev,
@@ -740,6 +741,7 @@ static int __devinit pn544_probe(struct i2c_client *client,
740 741
741 info->regs[0].supply = reg_vdd_io; 742 info->regs[0].supply = reg_vdd_io;
742 info->regs[1].supply = reg_vbat; 743 info->regs[1].supply = reg_vbat;
744 info->regs[2].supply = reg_vsim;
743 r = regulator_bulk_get(&client->dev, ARRAY_SIZE(info->regs), 745 r = regulator_bulk_get(&client->dev, ARRAY_SIZE(info->regs),
744 info->regs); 746 info->regs);
745 if (r < 0) 747 if (r < 0)
diff --git a/drivers/of/pdt.c b/drivers/of/pdt.c
index 28295d0a50f6..4d87b5dc9284 100644
--- a/drivers/of/pdt.c
+++ b/drivers/of/pdt.c
@@ -36,19 +36,55 @@ unsigned int of_pdt_unique_id __initdata;
36 (p)->unique_id = of_pdt_unique_id++; \ 36 (p)->unique_id = of_pdt_unique_id++; \
37} while (0) 37} while (0)
38 38
39static inline const char *of_pdt_node_name(struct device_node *dp) 39static char * __init of_pdt_build_full_name(struct device_node *dp)
40{ 40{
41 return dp->path_component_name; 41 int len, ourlen, plen;
42 char *n;
43
44 dp->path_component_name = build_path_component(dp);
45
46 plen = strlen(dp->parent->full_name);
47 ourlen = strlen(dp->path_component_name);
48 len = ourlen + plen + 2;
49
50 n = prom_early_alloc(len);
51 strcpy(n, dp->parent->full_name);
52 if (!of_node_is_root(dp->parent)) {
53 strcpy(n + plen, "/");
54 plen++;
55 }
56 strcpy(n + plen, dp->path_component_name);
57
58 return n;
42} 59}
43 60
44#else 61#else /* CONFIG_SPARC */
45 62
46static inline void of_pdt_incr_unique_id(void *p) { } 63static inline void of_pdt_incr_unique_id(void *p) { }
47static inline void irq_trans_init(struct device_node *dp) { } 64static inline void irq_trans_init(struct device_node *dp) { }
48 65
49static inline const char *of_pdt_node_name(struct device_node *dp) 66static char * __init of_pdt_build_full_name(struct device_node *dp)
50{ 67{
51 return dp->name; 68 static int failsafe_id = 0; /* for generating unique names on failure */
69 char *buf;
70 int len;
71
72 if (of_pdt_prom_ops->pkg2path(dp->phandle, NULL, 0, &len))
73 goto failsafe;
74
75 buf = prom_early_alloc(len + 1);
76 if (of_pdt_prom_ops->pkg2path(dp->phandle, buf, len, &len))
77 goto failsafe;
78 return buf;
79
80 failsafe:
81 buf = prom_early_alloc(strlen(dp->parent->full_name) +
82 strlen(dp->name) + 16);
83 sprintf(buf, "%s/%s@unknown%i",
84 of_node_is_root(dp->parent) ? "" : dp->parent->full_name,
85 dp->name, failsafe_id++);
86 pr_err("%s: pkg2path failed; assigning %s\n", __func__, buf);
87 return buf;
52} 88}
53 89
54#endif /* !CONFIG_SPARC */ 90#endif /* !CONFIG_SPARC */
@@ -132,47 +168,6 @@ static char * __init of_pdt_get_one_property(phandle node, const char *name)
132 return buf; 168 return buf;
133} 169}
134 170
135static char * __init of_pdt_try_pkg2path(phandle node)
136{
137 char *res, *buf = NULL;
138 int len;
139
140 if (!of_pdt_prom_ops->pkg2path)
141 return NULL;
142
143 if (of_pdt_prom_ops->pkg2path(node, buf, 0, &len))
144 return NULL;
145 buf = prom_early_alloc(len + 1);
146 if (of_pdt_prom_ops->pkg2path(node, buf, len, &len)) {
147 pr_err("%s: package-to-path failed\n", __func__);
148 return NULL;
149 }
150
151 res = strrchr(buf, '/');
152 if (!res) {
153 pr_err("%s: couldn't find / in %s\n", __func__, buf);
154 return NULL;
155 }
156 return res+1;
157}
158
159/*
160 * When fetching the node's name, first try using package-to-path; if
161 * that fails (either because the arch hasn't supplied a PROM callback,
162 * or some other random failure), fall back to just looking at the node's
163 * 'name' property.
164 */
165static char * __init of_pdt_build_name(phandle node)
166{
167 char *buf;
168
169 buf = of_pdt_try_pkg2path(node);
170 if (!buf)
171 buf = of_pdt_get_one_property(node, "name");
172
173 return buf;
174}
175
176static struct device_node * __init of_pdt_create_node(phandle node, 171static struct device_node * __init of_pdt_create_node(phandle node,
177 struct device_node *parent) 172 struct device_node *parent)
178{ 173{
@@ -187,7 +182,7 @@ static struct device_node * __init of_pdt_create_node(phandle node,
187 182
188 kref_init(&dp->kref); 183 kref_init(&dp->kref);
189 184
190 dp->name = of_pdt_build_name(node); 185 dp->name = of_pdt_get_one_property(node, "name");
191 dp->type = of_pdt_get_one_property(node, "device_type"); 186 dp->type = of_pdt_get_one_property(node, "device_type");
192 dp->phandle = node; 187 dp->phandle = node;
193 188
@@ -198,26 +193,6 @@ static struct device_node * __init of_pdt_create_node(phandle node,
198 return dp; 193 return dp;
199} 194}
200 195
201static char * __init of_pdt_build_full_name(struct device_node *dp)
202{
203 int len, ourlen, plen;
204 char *n;
205
206 plen = strlen(dp->parent->full_name);
207 ourlen = strlen(of_pdt_node_name(dp));
208 len = ourlen + plen + 2;
209
210 n = prom_early_alloc(len);
211 strcpy(n, dp->parent->full_name);
212 if (!of_node_is_root(dp->parent)) {
213 strcpy(n + plen, "/");
214 plen++;
215 }
216 strcpy(n + plen, of_pdt_node_name(dp));
217
218 return n;
219}
220
221static struct device_node * __init of_pdt_build_tree(struct device_node *parent, 196static struct device_node * __init of_pdt_build_tree(struct device_node *parent,
222 phandle node, 197 phandle node,
223 struct device_node ***nextp) 198 struct device_node ***nextp)
@@ -240,9 +215,6 @@ static struct device_node * __init of_pdt_build_tree(struct device_node *parent,
240 *(*nextp) = dp; 215 *(*nextp) = dp;
241 *nextp = &dp->allnext; 216 *nextp = &dp->allnext;
242 217
243#if defined(CONFIG_SPARC)
244 dp->path_component_name = build_path_component(dp);
245#endif
246 dp->full_name = of_pdt_build_full_name(dp); 218 dp->full_name = of_pdt_build_full_name(dp);
247 219
248 dp->child = of_pdt_build_tree(dp, 220 dp->child = of_pdt_build_tree(dp,
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 3a5a6fcc0ead..492b7d807fe8 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -243,7 +243,7 @@ struct pci_ops pcifront_bus_ops = {
243 243
244#ifdef CONFIG_PCI_MSI 244#ifdef CONFIG_PCI_MSI
245static int pci_frontend_enable_msix(struct pci_dev *dev, 245static int pci_frontend_enable_msix(struct pci_dev *dev,
246 int **vector, int nvec) 246 int vector[], int nvec)
247{ 247{
248 int err; 248 int err;
249 int i; 249 int i;
@@ -277,18 +277,24 @@ static int pci_frontend_enable_msix(struct pci_dev *dev,
277 if (likely(!err)) { 277 if (likely(!err)) {
278 if (likely(!op.value)) { 278 if (likely(!op.value)) {
279 /* we get the result */ 279 /* we get the result */
280 for (i = 0; i < nvec; i++) 280 for (i = 0; i < nvec; i++) {
281 *(*vector+i) = op.msix_entries[i].vector; 281 if (op.msix_entries[i].vector <= 0) {
282 return 0; 282 dev_warn(&dev->dev, "MSI-X entry %d is invalid: %d!\n",
283 i, op.msix_entries[i].vector);
284 err = -EINVAL;
285 vector[i] = -1;
286 continue;
287 }
288 vector[i] = op.msix_entries[i].vector;
289 }
283 } else { 290 } else {
284 printk(KERN_DEBUG "enable msix get value %x\n", 291 printk(KERN_DEBUG "enable msix get value %x\n",
285 op.value); 292 op.value);
286 return op.value;
287 } 293 }
288 } else { 294 } else {
289 dev_err(&dev->dev, "enable msix get err %x\n", err); 295 dev_err(&dev->dev, "enable msix get err %x\n", err);
290 return err;
291 } 296 }
297 return err;
292} 298}
293 299
294static void pci_frontend_disable_msix(struct pci_dev *dev) 300static void pci_frontend_disable_msix(struct pci_dev *dev)
@@ -310,7 +316,7 @@ static void pci_frontend_disable_msix(struct pci_dev *dev)
310 dev_err(&dev->dev, "pci_disable_msix get err %x\n", err); 316 dev_err(&dev->dev, "pci_disable_msix get err %x\n", err);
311} 317}
312 318
313static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector) 319static int pci_frontend_enable_msi(struct pci_dev *dev, int vector[])
314{ 320{
315 int err; 321 int err;
316 struct xen_pci_op op = { 322 struct xen_pci_op op = {
@@ -324,7 +330,13 @@ static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector)
324 330
325 err = do_pci_op(pdev, &op); 331 err = do_pci_op(pdev, &op);
326 if (likely(!err)) { 332 if (likely(!err)) {
327 *(*vector) = op.value; 333 vector[0] = op.value;
334 if (op.value <= 0) {
335 dev_warn(&dev->dev, "MSI entry is invalid: %d!\n",
336 op.value);
337 err = -EINVAL;
338 vector[0] = -1;
339 }
328 } else { 340 } else {
329 dev_err(&dev->dev, "pci frontend enable msi failed for dev " 341 dev_err(&dev->dev, "pci frontend enable msi failed for dev "
330 "%x:%x\n", op.bus, op.devfn); 342 "%x:%x\n", op.bus, op.devfn);
@@ -733,8 +745,7 @@ static void free_pdev(struct pcifront_device *pdev)
733 745
734 pcifront_free_roots(pdev); 746 pcifront_free_roots(pdev);
735 747
736 /*For PCIE_AER error handling job*/ 748 cancel_work_sync(&pdev->op_work);
737 flush_scheduled_work();
738 749
739 if (pdev->irq >= 0) 750 if (pdev->irq >= 0)
740 unbind_from_irqhandler(pdev->irq, pdev); 751 unbind_from_irqhandler(pdev->irq, pdev);
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index 0bdda5b3ed55..42fbf1a75576 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -518,6 +518,8 @@ int pcmcia_enable_device(struct pcmcia_device *p_dev)
518 flags |= CONF_ENABLE_IOCARD; 518 flags |= CONF_ENABLE_IOCARD;
519 if (flags & CONF_ENABLE_IOCARD) 519 if (flags & CONF_ENABLE_IOCARD)
520 s->socket.flags |= SS_IOCARD; 520 s->socket.flags |= SS_IOCARD;
521 if (flags & CONF_ENABLE_ZVCARD)
522 s->socket.flags |= SS_ZVCARD | SS_IOCARD;
521 if (flags & CONF_ENABLE_SPKR) { 523 if (flags & CONF_ENABLE_SPKR) {
522 s->socket.flags |= SS_SPKR_ENA; 524 s->socket.flags |= SS_SPKR_ENA;
523 status = CCSR_AUDIO_ENA; 525 status = CCSR_AUDIO_ENA;
diff --git a/drivers/pcmcia/pxa2xx_base.c b/drivers/pcmcia/pxa2xx_base.c
index 3755e7c8c715..2c540542b5af 100644
--- a/drivers/pcmcia/pxa2xx_base.c
+++ b/drivers/pcmcia/pxa2xx_base.c
@@ -215,7 +215,7 @@ pxa2xx_pcmcia_frequency_change(struct soc_pcmcia_socket *skt,
215} 215}
216#endif 216#endif
217 217
218static void pxa2xx_configure_sockets(struct device *dev) 218void pxa2xx_configure_sockets(struct device *dev)
219{ 219{
220 struct pcmcia_low_level *ops = dev->platform_data; 220 struct pcmcia_low_level *ops = dev->platform_data;
221 /* 221 /*
diff --git a/drivers/pcmcia/pxa2xx_base.h b/drivers/pcmcia/pxa2xx_base.h
index bb62ea87b8f9..b609b45469ed 100644
--- a/drivers/pcmcia/pxa2xx_base.h
+++ b/drivers/pcmcia/pxa2xx_base.h
@@ -1,3 +1,4 @@
1int pxa2xx_drv_pcmcia_add_one(struct soc_pcmcia_socket *skt); 1int pxa2xx_drv_pcmcia_add_one(struct soc_pcmcia_socket *skt);
2void pxa2xx_drv_pcmcia_ops(struct pcmcia_low_level *ops); 2void pxa2xx_drv_pcmcia_ops(struct pcmcia_low_level *ops);
3void pxa2xx_configure_sockets(struct device *dev);
3 4
diff --git a/drivers/pcmcia/pxa2xx_colibri.c b/drivers/pcmcia/pxa2xx_colibri.c
index c3f72192af66..a52039564e74 100644
--- a/drivers/pcmcia/pxa2xx_colibri.c
+++ b/drivers/pcmcia/pxa2xx_colibri.c
@@ -181,6 +181,9 @@ static int __init colibri_pcmcia_init(void)
181{ 181{
182 int ret; 182 int ret;
183 183
184 if (!machine_is_colibri() && !machine_is_colibri320())
185 return -ENODEV;
186
184 colibri_pcmcia_device = platform_device_alloc("pxa2xx-pcmcia", -1); 187 colibri_pcmcia_device = platform_device_alloc("pxa2xx-pcmcia", -1);
185 if (!colibri_pcmcia_device) 188 if (!colibri_pcmcia_device)
186 return -ENOMEM; 189 return -ENOMEM;
diff --git a/drivers/pcmcia/pxa2xx_lubbock.c b/drivers/pcmcia/pxa2xx_lubbock.c
index b9f8c8fb42bd..25afe637c657 100644
--- a/drivers/pcmcia/pxa2xx_lubbock.c
+++ b/drivers/pcmcia/pxa2xx_lubbock.c
@@ -226,6 +226,7 @@ int pcmcia_lubbock_init(struct sa1111_dev *sadev)
226 lubbock_set_misc_wr((1 << 15) | (1 << 14), 0); 226 lubbock_set_misc_wr((1 << 15) | (1 << 14), 0);
227 227
228 pxa2xx_drv_pcmcia_ops(&lubbock_pcmcia_ops); 228 pxa2xx_drv_pcmcia_ops(&lubbock_pcmcia_ops);
229 pxa2xx_configure_sockets(&sadev->dev);
229 ret = sa1111_pcmcia_add(sadev, &lubbock_pcmcia_ops, 230 ret = sa1111_pcmcia_add(sadev, &lubbock_pcmcia_ops,
230 pxa2xx_drv_pcmcia_add_one); 231 pxa2xx_drv_pcmcia_add_one);
231 } 232 }
diff --git a/drivers/pps/generators/Kconfig b/drivers/pps/generators/Kconfig
index f3a73dd77660..e4c4f3dc0728 100644
--- a/drivers/pps/generators/Kconfig
+++ b/drivers/pps/generators/Kconfig
@@ -6,7 +6,7 @@ comment "PPS generators support"
6 6
7config PPS_GENERATOR_PARPORT 7config PPS_GENERATOR_PARPORT
8 tristate "Parallel port PPS signal generator" 8 tristate "Parallel port PPS signal generator"
9 depends on PARPORT 9 depends on PARPORT && BROKEN
10 help 10 help
11 If you say yes here you get support for a PPS signal generator which 11 If you say yes here you get support for a PPS signal generator which
12 utilizes STROBE pin of a parallel port to send PPS signals. It uses 12 utilizes STROBE pin of a parallel port to send PPS signals. It uses
diff --git a/drivers/pps/kapi.c b/drivers/pps/kapi.c
index cba1b43f7519..a4e8eb9fece6 100644
--- a/drivers/pps/kapi.c
+++ b/drivers/pps/kapi.c
@@ -168,7 +168,7 @@ void pps_event(struct pps_device *pps, struct pps_event_time *ts, int event,
168{ 168{
169 unsigned long flags; 169 unsigned long flags;
170 int captured = 0; 170 int captured = 0;
171 struct pps_ktime ts_real; 171 struct pps_ktime ts_real = { .sec = 0, .nsec = 0, .flags = 0 };
172 172
173 /* check event type */ 173 /* check event type */
174 BUG_ON((event & (PPS_CAPTUREASSERT | PPS_CAPTURECLEAR)) == 0); 174 BUG_ON((event & (PPS_CAPTUREASSERT | PPS_CAPTURECLEAR)) == 0);
diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c
index 76b41853a877..1269fbd2deca 100644
--- a/drivers/rapidio/rio-sysfs.c
+++ b/drivers/rapidio/rio-sysfs.c
@@ -77,9 +77,9 @@ rio_read_config(struct file *filp, struct kobject *kobj,
77 77
78 /* Several chips lock up trying to read undefined config space */ 78 /* Several chips lock up trying to read undefined config space */
79 if (capable(CAP_SYS_ADMIN)) 79 if (capable(CAP_SYS_ADMIN))
80 size = 0x200000; 80 size = RIO_MAINT_SPACE_SZ;
81 81
82 if (off > size) 82 if (off >= size)
83 return 0; 83 return 0;
84 if (off + count > size) { 84 if (off + count > size) {
85 size -= off; 85 size -= off;
@@ -147,10 +147,10 @@ rio_write_config(struct file *filp, struct kobject *kobj,
147 loff_t init_off = off; 147 loff_t init_off = off;
148 u8 *data = (u8 *) buf; 148 u8 *data = (u8 *) buf;
149 149
150 if (off > 0x200000) 150 if (off >= RIO_MAINT_SPACE_SZ)
151 return 0; 151 return 0;
152 if (off + count > 0x200000) { 152 if (off + count > RIO_MAINT_SPACE_SZ) {
153 size = 0x200000 - off; 153 size = RIO_MAINT_SPACE_SZ - off;
154 count = size; 154 count = size;
155 } 155 }
156 156
@@ -200,7 +200,7 @@ static struct bin_attribute rio_config_attr = {
200 .name = "config", 200 .name = "config",
201 .mode = S_IRUGO | S_IWUSR, 201 .mode = S_IRUGO | S_IWUSR,
202 }, 202 },
203 .size = 0x200000, 203 .size = RIO_MAINT_SPACE_SZ,
204 .read = rio_read_config, 204 .read = rio_read_config,
205 .write = rio_write_config, 205 .write = rio_write_config,
206}; 206};
diff --git a/drivers/regulator/mc13xxx-regulator-core.c b/drivers/regulator/mc13xxx-regulator-core.c
index f53d31b950d4..2bb5de1f2421 100644
--- a/drivers/regulator/mc13xxx-regulator-core.c
+++ b/drivers/regulator/mc13xxx-regulator-core.c
@@ -174,7 +174,7 @@ static int mc13xxx_regulator_get_voltage(struct regulator_dev *rdev)
174 174
175 dev_dbg(rdev_get_dev(rdev), "%s id: %d val: %d\n", __func__, id, val); 175 dev_dbg(rdev_get_dev(rdev), "%s id: %d val: %d\n", __func__, id, val);
176 176
177 BUG_ON(val < 0 || val > mc13xxx_regulators[id].desc.n_voltages); 177 BUG_ON(val > mc13xxx_regulators[id].desc.n_voltages);
178 178
179 return mc13xxx_regulators[id].voltages[val]; 179 return mc13xxx_regulators[id].voltages[val];
180} 180}
diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c
index 8b0d2c4bde91..06df898842c0 100644
--- a/drivers/regulator/wm831x-dcdc.c
+++ b/drivers/regulator/wm831x-dcdc.c
@@ -120,6 +120,7 @@ static unsigned int wm831x_dcdc_get_mode(struct regulator_dev *rdev)
120 return REGULATOR_MODE_IDLE; 120 return REGULATOR_MODE_IDLE;
121 default: 121 default:
122 BUG(); 122 BUG();
123 return -EINVAL;
123 } 124 }
124} 125}
125 126
diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index c404b61386bf..09b4437b3e61 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -117,6 +117,7 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
117 struct module *owner) 117 struct module *owner)
118{ 118{
119 struct rtc_device *rtc; 119 struct rtc_device *rtc;
120 struct rtc_wkalrm alrm;
120 int id, err; 121 int id, err;
121 122
122 if (idr_pre_get(&rtc_idr, GFP_KERNEL) == 0) { 123 if (idr_pre_get(&rtc_idr, GFP_KERNEL) == 0) {
@@ -166,6 +167,12 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
166 rtc->pie_timer.function = rtc_pie_update_irq; 167 rtc->pie_timer.function = rtc_pie_update_irq;
167 rtc->pie_enabled = 0; 168 rtc->pie_enabled = 0;
168 169
170 /* Check to see if there is an ALARM already set in hw */
171 err = __rtc_read_alarm(rtc, &alrm);
172
173 if (!err && !rtc_valid_tm(&alrm.time))
174 rtc_set_alarm(rtc, &alrm);
175
169 strlcpy(rtc->name, name, RTC_DEVICE_NAME_SIZE); 176 strlcpy(rtc->name, name, RTC_DEVICE_NAME_SIZE);
170 dev_set_name(&rtc->dev, "rtc%d", id); 177 dev_set_name(&rtc->dev, "rtc%d", id);
171 178
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index cb2f0728fd70..8ec6b069a7f5 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -116,6 +116,186 @@ int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs)
116} 116}
117EXPORT_SYMBOL_GPL(rtc_set_mmss); 117EXPORT_SYMBOL_GPL(rtc_set_mmss);
118 118
119static int rtc_read_alarm_internal(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
120{
121 int err;
122
123 err = mutex_lock_interruptible(&rtc->ops_lock);
124 if (err)
125 return err;
126
127 if (rtc->ops == NULL)
128 err = -ENODEV;
129 else if (!rtc->ops->read_alarm)
130 err = -EINVAL;
131 else {
132 memset(alarm, 0, sizeof(struct rtc_wkalrm));
133 err = rtc->ops->read_alarm(rtc->dev.parent, alarm);
134 }
135
136 mutex_unlock(&rtc->ops_lock);
137 return err;
138}
139
140int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
141{
142 int err;
143 struct rtc_time before, now;
144 int first_time = 1;
145 unsigned long t_now, t_alm;
146 enum { none, day, month, year } missing = none;
147 unsigned days;
148
149 /* The lower level RTC driver may return -1 in some fields,
150 * creating invalid alarm->time values, for reasons like:
151 *
152 * - The hardware may not be capable of filling them in;
153 * many alarms match only on time-of-day fields, not
154 * day/month/year calendar data.
155 *
156 * - Some hardware uses illegal values as "wildcard" match
157 * values, which non-Linux firmware (like a BIOS) may try
158 * to set up as e.g. "alarm 15 minutes after each hour".
159 * Linux uses only oneshot alarms.
160 *
161 * When we see that here, we deal with it by using values from
162 * a current RTC timestamp for any missing (-1) values. The
163 * RTC driver prevents "periodic alarm" modes.
164 *
165 * But this can be racey, because some fields of the RTC timestamp
166 * may have wrapped in the interval since we read the RTC alarm,
167 * which would lead to us inserting inconsistent values in place
168 * of the -1 fields.
169 *
170 * Reading the alarm and timestamp in the reverse sequence
171 * would have the same race condition, and not solve the issue.
172 *
173 * So, we must first read the RTC timestamp,
174 * then read the RTC alarm value,
175 * and then read a second RTC timestamp.
176 *
177 * If any fields of the second timestamp have changed
178 * when compared with the first timestamp, then we know
179 * our timestamp may be inconsistent with that used by
180 * the low-level rtc_read_alarm_internal() function.
181 *
182 * So, when the two timestamps disagree, we just loop and do
183 * the process again to get a fully consistent set of values.
184 *
185 * This could all instead be done in the lower level driver,
186 * but since more than one lower level RTC implementation needs it,
187 * then it's probably best best to do it here instead of there..
188 */
189
190 /* Get the "before" timestamp */
191 err = rtc_read_time(rtc, &before);
192 if (err < 0)
193 return err;
194 do {
195 if (!first_time)
196 memcpy(&before, &now, sizeof(struct rtc_time));
197 first_time = 0;
198
199 /* get the RTC alarm values, which may be incomplete */
200 err = rtc_read_alarm_internal(rtc, alarm);
201 if (err)
202 return err;
203
204 /* full-function RTCs won't have such missing fields */
205 if (rtc_valid_tm(&alarm->time) == 0)
206 return 0;
207
208 /* get the "after" timestamp, to detect wrapped fields */
209 err = rtc_read_time(rtc, &now);
210 if (err < 0)
211 return err;
212
213 /* note that tm_sec is a "don't care" value here: */
214 } while ( before.tm_min != now.tm_min
215 || before.tm_hour != now.tm_hour
216 || before.tm_mon != now.tm_mon
217 || before.tm_year != now.tm_year);
218
219 /* Fill in the missing alarm fields using the timestamp; we
220 * know there's at least one since alarm->time is invalid.
221 */
222 if (alarm->time.tm_sec == -1)
223 alarm->time.tm_sec = now.tm_sec;
224 if (alarm->time.tm_min == -1)
225 alarm->time.tm_min = now.tm_min;
226 if (alarm->time.tm_hour == -1)
227 alarm->time.tm_hour = now.tm_hour;
228
229 /* For simplicity, only support date rollover for now */
230 if (alarm->time.tm_mday == -1) {
231 alarm->time.tm_mday = now.tm_mday;
232 missing = day;
233 }
234 if (alarm->time.tm_mon == -1) {
235 alarm->time.tm_mon = now.tm_mon;
236 if (missing == none)
237 missing = month;
238 }
239 if (alarm->time.tm_year == -1) {
240 alarm->time.tm_year = now.tm_year;
241 if (missing == none)
242 missing = year;
243 }
244
245 /* with luck, no rollover is needed */
246 rtc_tm_to_time(&now, &t_now);
247 rtc_tm_to_time(&alarm->time, &t_alm);
248 if (t_now < t_alm)
249 goto done;
250
251 switch (missing) {
252
253 /* 24 hour rollover ... if it's now 10am Monday, an alarm that
254 * that will trigger at 5am will do so at 5am Tuesday, which
255 * could also be in the next month or year. This is a common
256 * case, especially for PCs.
257 */
258 case day:
259 dev_dbg(&rtc->dev, "alarm rollover: %s\n", "day");
260 t_alm += 24 * 60 * 60;
261 rtc_time_to_tm(t_alm, &alarm->time);
262 break;
263
264 /* Month rollover ... if it's the 31th, an alarm on the 3rd will
265 * be next month. An alarm matching on the 30th, 29th, or 28th
266 * may end up in the month after that! Many newer PCs support
267 * this type of alarm.
268 */
269 case month:
270 dev_dbg(&rtc->dev, "alarm rollover: %s\n", "month");
271 do {
272 if (alarm->time.tm_mon < 11)
273 alarm->time.tm_mon++;
274 else {
275 alarm->time.tm_mon = 0;
276 alarm->time.tm_year++;
277 }
278 days = rtc_month_days(alarm->time.tm_mon,
279 alarm->time.tm_year);
280 } while (days < alarm->time.tm_mday);
281 break;
282
283 /* Year rollover ... easy except for leap years! */
284 case year:
285 dev_dbg(&rtc->dev, "alarm rollover: %s\n", "year");
286 do {
287 alarm->time.tm_year++;
288 } while (rtc_valid_tm(&alarm->time) != 0);
289 break;
290
291 default:
292 dev_warn(&rtc->dev, "alarm rollover not handled\n");
293 }
294
295done:
296 return 0;
297}
298
119int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) 299int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
120{ 300{
121 int err; 301 int err;
diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c
index 26d1cf5d19ae..518a76ec71ca 100644
--- a/drivers/rtc/rtc-at91rm9200.c
+++ b/drivers/rtc/rtc-at91rm9200.c
@@ -183,33 +183,6 @@ static int at91_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
183 return 0; 183 return 0;
184} 184}
185 185
186/*
187 * Handle commands from user-space
188 */
189static int at91_rtc_ioctl(struct device *dev, unsigned int cmd,
190 unsigned long arg)
191{
192 int ret = 0;
193
194 pr_debug("%s(): cmd=%08x, arg=%08lx.\n", __func__, cmd, arg);
195
196 /* important: scrub old status before enabling IRQs */
197 switch (cmd) {
198 case RTC_UIE_OFF: /* update off */
199 at91_sys_write(AT91_RTC_IDR, AT91_RTC_SECEV);
200 break;
201 case RTC_UIE_ON: /* update on */
202 at91_sys_write(AT91_RTC_SCCR, AT91_RTC_SECEV);
203 at91_sys_write(AT91_RTC_IER, AT91_RTC_SECEV);
204 break;
205 default:
206 ret = -ENOIOCTLCMD;
207 break;
208 }
209
210 return ret;
211}
212
213static int at91_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) 186static int at91_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
214{ 187{
215 pr_debug("%s(): cmd=%08x\n", __func__, enabled); 188 pr_debug("%s(): cmd=%08x\n", __func__, enabled);
@@ -269,7 +242,6 @@ static irqreturn_t at91_rtc_interrupt(int irq, void *dev_id)
269} 242}
270 243
271static const struct rtc_class_ops at91_rtc_ops = { 244static const struct rtc_class_ops at91_rtc_ops = {
272 .ioctl = at91_rtc_ioctl,
273 .read_time = at91_rtc_readtime, 245 .read_time = at91_rtc_readtime,
274 .set_time = at91_rtc_settime, 246 .set_time = at91_rtc_settime,
275 .read_alarm = at91_rtc_readalarm, 247 .read_alarm = at91_rtc_readalarm,
diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c
index c36749e4c926..a3ad957507dc 100644
--- a/drivers/rtc/rtc-at91sam9.c
+++ b/drivers/rtc/rtc-at91sam9.c
@@ -216,33 +216,6 @@ static int at91_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
216 return 0; 216 return 0;
217} 217}
218 218
219/*
220 * Handle commands from user-space
221 */
222static int at91_rtc_ioctl(struct device *dev, unsigned int cmd,
223 unsigned long arg)
224{
225 struct sam9_rtc *rtc = dev_get_drvdata(dev);
226 int ret = 0;
227 u32 mr = rtt_readl(rtc, MR);
228
229 dev_dbg(dev, "ioctl: cmd=%08x, arg=%08lx, mr %08x\n", cmd, arg, mr);
230
231 switch (cmd) {
232 case RTC_UIE_OFF: /* update off */
233 rtt_writel(rtc, MR, mr & ~AT91_RTT_RTTINCIEN);
234 break;
235 case RTC_UIE_ON: /* update on */
236 rtt_writel(rtc, MR, mr | AT91_RTT_RTTINCIEN);
237 break;
238 default:
239 ret = -ENOIOCTLCMD;
240 break;
241 }
242
243 return ret;
244}
245
246static int at91_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) 219static int at91_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
247{ 220{
248 struct sam9_rtc *rtc = dev_get_drvdata(dev); 221 struct sam9_rtc *rtc = dev_get_drvdata(dev);
@@ -303,13 +276,12 @@ static irqreturn_t at91_rtc_interrupt(int irq, void *_rtc)
303} 276}
304 277
305static const struct rtc_class_ops at91_rtc_ops = { 278static const struct rtc_class_ops at91_rtc_ops = {
306 .ioctl = at91_rtc_ioctl,
307 .read_time = at91_rtc_readtime, 279 .read_time = at91_rtc_readtime,
308 .set_time = at91_rtc_settime, 280 .set_time = at91_rtc_settime,
309 .read_alarm = at91_rtc_readalarm, 281 .read_alarm = at91_rtc_readalarm,
310 .set_alarm = at91_rtc_setalarm, 282 .set_alarm = at91_rtc_setalarm,
311 .proc = at91_rtc_proc, 283 .proc = at91_rtc_proc,
312 .alarm_irq_enabled = at91_rtc_alarm_irq_enable, 284 .alarm_irq_enable = at91_rtc_alarm_irq_enable,
313}; 285};
314 286
315/* 287/*
diff --git a/drivers/rtc/rtc-bfin.c b/drivers/rtc/rtc-bfin.c
index 17971d93354d..ca9cff85ab8a 100644
--- a/drivers/rtc/rtc-bfin.c
+++ b/drivers/rtc/rtc-bfin.c
@@ -240,32 +240,6 @@ static void bfin_rtc_int_set_alarm(struct bfin_rtc *rtc)
240 */ 240 */
241 bfin_rtc_int_set(rtc->rtc_alarm.tm_yday == -1 ? RTC_ISTAT_ALARM : RTC_ISTAT_ALARM_DAY); 241 bfin_rtc_int_set(rtc->rtc_alarm.tm_yday == -1 ? RTC_ISTAT_ALARM : RTC_ISTAT_ALARM_DAY);
242} 242}
243static int bfin_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
244{
245 struct bfin_rtc *rtc = dev_get_drvdata(dev);
246 int ret = 0;
247
248 dev_dbg_stamp(dev);
249
250 bfin_rtc_sync_pending(dev);
251
252 switch (cmd) {
253 case RTC_UIE_ON:
254 dev_dbg_stamp(dev);
255 bfin_rtc_int_set(RTC_ISTAT_SEC);
256 break;
257 case RTC_UIE_OFF:
258 dev_dbg_stamp(dev);
259 bfin_rtc_int_clear(~RTC_ISTAT_SEC);
260 break;
261
262 default:
263 dev_dbg_stamp(dev);
264 ret = -ENOIOCTLCMD;
265 }
266
267 return ret;
268}
269 243
270static int bfin_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) 244static int bfin_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
271{ 245{
@@ -358,7 +332,6 @@ static int bfin_rtc_proc(struct device *dev, struct seq_file *seq)
358} 332}
359 333
360static struct rtc_class_ops bfin_rtc_ops = { 334static struct rtc_class_ops bfin_rtc_ops = {
361 .ioctl = bfin_rtc_ioctl,
362 .read_time = bfin_rtc_read_time, 335 .read_time = bfin_rtc_read_time,
363 .set_time = bfin_rtc_set_time, 336 .set_time = bfin_rtc_set_time,
364 .read_alarm = bfin_rtc_read_alarm, 337 .read_alarm = bfin_rtc_read_alarm,
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 159b95e4b420..911e75cdc125 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -377,50 +377,6 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
377 return 0; 377 return 0;
378} 378}
379 379
380static int cmos_irq_set_freq(struct device *dev, int freq)
381{
382 struct cmos_rtc *cmos = dev_get_drvdata(dev);
383 int f;
384 unsigned long flags;
385
386 if (!is_valid_irq(cmos->irq))
387 return -ENXIO;
388
389 if (!is_power_of_2(freq))
390 return -EINVAL;
391 /* 0 = no irqs; 1 = 2^15 Hz ... 15 = 2^0 Hz */
392 f = ffs(freq);
393 if (f-- > 16)
394 return -EINVAL;
395 f = 16 - f;
396
397 spin_lock_irqsave(&rtc_lock, flags);
398 hpet_set_periodic_freq(freq);
399 CMOS_WRITE(RTC_REF_CLCK_32KHZ | f, RTC_FREQ_SELECT);
400 spin_unlock_irqrestore(&rtc_lock, flags);
401
402 return 0;
403}
404
405static int cmos_irq_set_state(struct device *dev, int enabled)
406{
407 struct cmos_rtc *cmos = dev_get_drvdata(dev);
408 unsigned long flags;
409
410 if (!is_valid_irq(cmos->irq))
411 return -ENXIO;
412
413 spin_lock_irqsave(&rtc_lock, flags);
414
415 if (enabled)
416 cmos_irq_enable(cmos, RTC_PIE);
417 else
418 cmos_irq_disable(cmos, RTC_PIE);
419
420 spin_unlock_irqrestore(&rtc_lock, flags);
421 return 0;
422}
423
424static int cmos_alarm_irq_enable(struct device *dev, unsigned int enabled) 380static int cmos_alarm_irq_enable(struct device *dev, unsigned int enabled)
425{ 381{
426 struct cmos_rtc *cmos = dev_get_drvdata(dev); 382 struct cmos_rtc *cmos = dev_get_drvdata(dev);
@@ -440,25 +396,6 @@ static int cmos_alarm_irq_enable(struct device *dev, unsigned int enabled)
440 return 0; 396 return 0;
441} 397}
442 398
443static int cmos_update_irq_enable(struct device *dev, unsigned int enabled)
444{
445 struct cmos_rtc *cmos = dev_get_drvdata(dev);
446 unsigned long flags;
447
448 if (!is_valid_irq(cmos->irq))
449 return -EINVAL;
450
451 spin_lock_irqsave(&rtc_lock, flags);
452
453 if (enabled)
454 cmos_irq_enable(cmos, RTC_UIE);
455 else
456 cmos_irq_disable(cmos, RTC_UIE);
457
458 spin_unlock_irqrestore(&rtc_lock, flags);
459 return 0;
460}
461
462#if defined(CONFIG_RTC_INTF_PROC) || defined(CONFIG_RTC_INTF_PROC_MODULE) 399#if defined(CONFIG_RTC_INTF_PROC) || defined(CONFIG_RTC_INTF_PROC_MODULE)
463 400
464static int cmos_procfs(struct device *dev, struct seq_file *seq) 401static int cmos_procfs(struct device *dev, struct seq_file *seq)
@@ -503,10 +440,7 @@ static const struct rtc_class_ops cmos_rtc_ops = {
503 .read_alarm = cmos_read_alarm, 440 .read_alarm = cmos_read_alarm,
504 .set_alarm = cmos_set_alarm, 441 .set_alarm = cmos_set_alarm,
505 .proc = cmos_procfs, 442 .proc = cmos_procfs,
506 .irq_set_freq = cmos_irq_set_freq,
507 .irq_set_state = cmos_irq_set_state,
508 .alarm_irq_enable = cmos_alarm_irq_enable, 443 .alarm_irq_enable = cmos_alarm_irq_enable,
509 .update_irq_enable = cmos_update_irq_enable,
510}; 444};
511 445
512/*----------------------------------------------------------------*/ 446/*----------------------------------------------------------------*/
diff --git a/drivers/rtc/rtc-davinci.c b/drivers/rtc/rtc-davinci.c
index 34647fc1ee98..8d46838dff8a 100644
--- a/drivers/rtc/rtc-davinci.c
+++ b/drivers/rtc/rtc-davinci.c
@@ -231,10 +231,6 @@ davinci_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
231 case RTC_WIE_OFF: 231 case RTC_WIE_OFF:
232 rtc_ctrl &= ~PRTCSS_RTC_CTRL_WEN; 232 rtc_ctrl &= ~PRTCSS_RTC_CTRL_WEN;
233 break; 233 break;
234 case RTC_UIE_OFF:
235 case RTC_UIE_ON:
236 ret = -ENOTTY;
237 break;
238 default: 234 default:
239 ret = -ENOIOCTLCMD; 235 ret = -ENOIOCTLCMD;
240 } 236 }
@@ -473,55 +469,6 @@ static int davinci_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
473 return 0; 469 return 0;
474} 470}
475 471
476static int davinci_rtc_irq_set_state(struct device *dev, int enabled)
477{
478 struct davinci_rtc *davinci_rtc = dev_get_drvdata(dev);
479 unsigned long flags;
480 u8 rtc_ctrl;
481
482 spin_lock_irqsave(&davinci_rtc_lock, flags);
483
484 rtc_ctrl = rtcss_read(davinci_rtc, PRTCSS_RTC_CTRL);
485
486 if (enabled) {
487 while (rtcss_read(davinci_rtc, PRTCSS_RTC_CTRL)
488 & PRTCSS_RTC_CTRL_WDTBUS)
489 cpu_relax();
490
491 rtc_ctrl |= PRTCSS_RTC_CTRL_TE;
492 rtcss_write(davinci_rtc, rtc_ctrl, PRTCSS_RTC_CTRL);
493
494 rtcss_write(davinci_rtc, 0x0, PRTCSS_RTC_CLKC_CNT);
495
496 rtc_ctrl |= PRTCSS_RTC_CTRL_TIEN |
497 PRTCSS_RTC_CTRL_TMMD |
498 PRTCSS_RTC_CTRL_TMRFLG;
499 } else
500 rtc_ctrl &= ~PRTCSS_RTC_CTRL_TIEN;
501
502 rtcss_write(davinci_rtc, rtc_ctrl, PRTCSS_RTC_CTRL);
503
504 spin_unlock_irqrestore(&davinci_rtc_lock, flags);
505
506 return 0;
507}
508
509static int davinci_rtc_irq_set_freq(struct device *dev, int freq)
510{
511 struct davinci_rtc *davinci_rtc = dev_get_drvdata(dev);
512 unsigned long flags;
513 u16 tmr_counter = (0x8000 >> (ffs(freq) - 1));
514
515 spin_lock_irqsave(&davinci_rtc_lock, flags);
516
517 rtcss_write(davinci_rtc, tmr_counter & 0xFF, PRTCSS_RTC_TMR0);
518 rtcss_write(davinci_rtc, (tmr_counter & 0xFF00) >> 8, PRTCSS_RTC_TMR1);
519
520 spin_unlock_irqrestore(&davinci_rtc_lock, flags);
521
522 return 0;
523}
524
525static struct rtc_class_ops davinci_rtc_ops = { 472static struct rtc_class_ops davinci_rtc_ops = {
526 .ioctl = davinci_rtc_ioctl, 473 .ioctl = davinci_rtc_ioctl,
527 .read_time = davinci_rtc_read_time, 474 .read_time = davinci_rtc_read_time,
@@ -529,8 +476,6 @@ static struct rtc_class_ops davinci_rtc_ops = {
529 .alarm_irq_enable = davinci_rtc_alarm_irq_enable, 476 .alarm_irq_enable = davinci_rtc_alarm_irq_enable,
530 .read_alarm = davinci_rtc_read_alarm, 477 .read_alarm = davinci_rtc_read_alarm,
531 .set_alarm = davinci_rtc_set_alarm, 478 .set_alarm = davinci_rtc_set_alarm,
532 .irq_set_state = davinci_rtc_irq_set_state,
533 .irq_set_freq = davinci_rtc_irq_set_freq,
534}; 479};
535 480
536static int __init davinci_rtc_probe(struct platform_device *pdev) 481static int __init davinci_rtc_probe(struct platform_device *pdev)
diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c
index 37268e97de49..3fffd708711f 100644
--- a/drivers/rtc/rtc-ds1511.c
+++ b/drivers/rtc/rtc-ds1511.c
@@ -397,29 +397,12 @@ static int ds1511_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
397 return 0; 397 return 0;
398} 398}
399 399
400static int ds1511_rtc_update_irq_enable(struct device *dev,
401 unsigned int enabled)
402{
403 struct platform_device *pdev = to_platform_device(dev);
404 struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
405
406 if (pdata->irq <= 0)
407 return -EINVAL;
408 if (enabled)
409 pdata->irqen |= RTC_UF;
410 else
411 pdata->irqen &= ~RTC_UF;
412 ds1511_rtc_update_alarm(pdata);
413 return 0;
414}
415
416static const struct rtc_class_ops ds1511_rtc_ops = { 400static const struct rtc_class_ops ds1511_rtc_ops = {
417 .read_time = ds1511_rtc_read_time, 401 .read_time = ds1511_rtc_read_time,
418 .set_time = ds1511_rtc_set_time, 402 .set_time = ds1511_rtc_set_time,
419 .read_alarm = ds1511_rtc_read_alarm, 403 .read_alarm = ds1511_rtc_read_alarm,
420 .set_alarm = ds1511_rtc_set_alarm, 404 .set_alarm = ds1511_rtc_set_alarm,
421 .alarm_irq_enable = ds1511_rtc_alarm_irq_enable, 405 .alarm_irq_enable = ds1511_rtc_alarm_irq_enable,
422 .update_irq_enable = ds1511_rtc_update_irq_enable,
423}; 406};
424 407
425 static ssize_t 408 static ssize_t
diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c
index ff432e2ca275..fee41b97c9e8 100644
--- a/drivers/rtc/rtc-ds1553.c
+++ b/drivers/rtc/rtc-ds1553.c
@@ -227,29 +227,12 @@ static int ds1553_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
227 return 0; 227 return 0;
228} 228}
229 229
230static int ds1553_rtc_update_irq_enable(struct device *dev,
231 unsigned int enabled)
232{
233 struct platform_device *pdev = to_platform_device(dev);
234 struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
235
236 if (pdata->irq <= 0)
237 return -EINVAL;
238 if (enabled)
239 pdata->irqen |= RTC_UF;
240 else
241 pdata->irqen &= ~RTC_UF;
242 ds1553_rtc_update_alarm(pdata);
243 return 0;
244}
245
246static const struct rtc_class_ops ds1553_rtc_ops = { 230static const struct rtc_class_ops ds1553_rtc_ops = {
247 .read_time = ds1553_rtc_read_time, 231 .read_time = ds1553_rtc_read_time,
248 .set_time = ds1553_rtc_set_time, 232 .set_time = ds1553_rtc_set_time,
249 .read_alarm = ds1553_rtc_read_alarm, 233 .read_alarm = ds1553_rtc_read_alarm,
250 .set_alarm = ds1553_rtc_set_alarm, 234 .set_alarm = ds1553_rtc_set_alarm,
251 .alarm_irq_enable = ds1553_rtc_alarm_irq_enable, 235 .alarm_irq_enable = ds1553_rtc_alarm_irq_enable,
252 .update_irq_enable = ds1553_rtc_update_irq_enable,
253}; 236};
254 237
255static ssize_t ds1553_nvram_read(struct file *filp, struct kobject *kobj, 238static ssize_t ds1553_nvram_read(struct file *filp, struct kobject *kobj,
diff --git a/drivers/rtc/rtc-ds3232.c b/drivers/rtc/rtc-ds3232.c
index 23a9ee19764c..27b7bf672ac6 100644
--- a/drivers/rtc/rtc-ds3232.c
+++ b/drivers/rtc/rtc-ds3232.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * RTC client/driver for the Maxim/Dallas DS3232 Real-Time Clock over I2C 2 * RTC client/driver for the Maxim/Dallas DS3232 Real-Time Clock over I2C
3 * 3 *
4 * Copyright (C) 2009-2010 Freescale Semiconductor. 4 * Copyright (C) 2009-2011 Freescale Semiconductor.
5 * Author: Jack Lan <jack.lan@freescale.com> 5 * Author: Jack Lan <jack.lan@freescale.com>
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify it 7 * This program is free software; you can redistribute it and/or modify it
@@ -141,9 +141,11 @@ static int ds3232_read_time(struct device *dev, struct rtc_time *time)
141 time->tm_hour = bcd2bin(hour); 141 time->tm_hour = bcd2bin(hour);
142 } 142 }
143 143
144 time->tm_wday = bcd2bin(week); 144 /* Day of the week in linux range is 0~6 while 1~7 in RTC chip */
145 time->tm_wday = bcd2bin(week) - 1;
145 time->tm_mday = bcd2bin(day); 146 time->tm_mday = bcd2bin(day);
146 time->tm_mon = bcd2bin(month & 0x7F); 147 /* linux tm_mon range:0~11, while month range is 1~12 in RTC chip */
148 time->tm_mon = bcd2bin(month & 0x7F) - 1;
147 if (century) 149 if (century)
148 add_century = 100; 150 add_century = 100;
149 151
@@ -162,9 +164,11 @@ static int ds3232_set_time(struct device *dev, struct rtc_time *time)
162 buf[0] = bin2bcd(time->tm_sec); 164 buf[0] = bin2bcd(time->tm_sec);
163 buf[1] = bin2bcd(time->tm_min); 165 buf[1] = bin2bcd(time->tm_min);
164 buf[2] = bin2bcd(time->tm_hour); 166 buf[2] = bin2bcd(time->tm_hour);
165 buf[3] = bin2bcd(time->tm_wday); /* Day of the week */ 167 /* Day of the week in linux range is 0~6 while 1~7 in RTC chip */
168 buf[3] = bin2bcd(time->tm_wday + 1);
166 buf[4] = bin2bcd(time->tm_mday); /* Date */ 169 buf[4] = bin2bcd(time->tm_mday); /* Date */
167 buf[5] = bin2bcd(time->tm_mon); 170 /* linux tm_mon range:0~11, while month range is 1~12 in RTC chip */
171 buf[5] = bin2bcd(time->tm_mon + 1);
168 if (time->tm_year >= 100) { 172 if (time->tm_year >= 100) {
169 buf[5] |= 0x80; 173 buf[5] |= 0x80;
170 buf[6] = bin2bcd(time->tm_year - 100); 174 buf[6] = bin2bcd(time->tm_year - 100);
@@ -335,23 +339,6 @@ static int ds3232_alarm_irq_enable(struct device *dev, unsigned int enabled)
335 return 0; 339 return 0;
336} 340}
337 341
338static int ds3232_update_irq_enable(struct device *dev, unsigned int enabled)
339{
340 struct i2c_client *client = to_i2c_client(dev);
341 struct ds3232 *ds3232 = i2c_get_clientdata(client);
342
343 if (client->irq <= 0)
344 return -EINVAL;
345
346 if (enabled)
347 ds3232->rtc->irq_data |= RTC_UF;
348 else
349 ds3232->rtc->irq_data &= ~RTC_UF;
350
351 ds3232_update_alarm(client);
352 return 0;
353}
354
355static irqreturn_t ds3232_irq(int irq, void *dev_id) 342static irqreturn_t ds3232_irq(int irq, void *dev_id)
356{ 343{
357 struct i2c_client *client = dev_id; 344 struct i2c_client *client = dev_id;
@@ -402,7 +389,6 @@ static const struct rtc_class_ops ds3232_rtc_ops = {
402 .read_alarm = ds3232_read_alarm, 389 .read_alarm = ds3232_read_alarm,
403 .set_alarm = ds3232_set_alarm, 390 .set_alarm = ds3232_set_alarm,
404 .alarm_irq_enable = ds3232_alarm_irq_enable, 391 .alarm_irq_enable = ds3232_alarm_irq_enable,
405 .update_irq_enable = ds3232_update_irq_enable,
406}; 392};
407 393
408static int __devinit ds3232_probe(struct i2c_client *client, 394static int __devinit ds3232_probe(struct i2c_client *client,
diff --git a/drivers/rtc/rtc-jz4740.c b/drivers/rtc/rtc-jz4740.c
index 2e16f72c9056..b6473631d182 100644
--- a/drivers/rtc/rtc-jz4740.c
+++ b/drivers/rtc/rtc-jz4740.c
@@ -168,12 +168,6 @@ static int jz4740_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
168 return ret; 168 return ret;
169} 169}
170 170
171static int jz4740_rtc_update_irq_enable(struct device *dev, unsigned int enable)
172{
173 struct jz4740_rtc *rtc = dev_get_drvdata(dev);
174 return jz4740_rtc_ctrl_set_bits(rtc, JZ_RTC_CTRL_1HZ_IRQ, enable);
175}
176
177static int jz4740_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) 171static int jz4740_rtc_alarm_irq_enable(struct device *dev, unsigned int enable)
178{ 172{
179 struct jz4740_rtc *rtc = dev_get_drvdata(dev); 173 struct jz4740_rtc *rtc = dev_get_drvdata(dev);
@@ -185,7 +179,6 @@ static struct rtc_class_ops jz4740_rtc_ops = {
185 .set_mmss = jz4740_rtc_set_mmss, 179 .set_mmss = jz4740_rtc_set_mmss,
186 .read_alarm = jz4740_rtc_read_alarm, 180 .read_alarm = jz4740_rtc_read_alarm,
187 .set_alarm = jz4740_rtc_set_alarm, 181 .set_alarm = jz4740_rtc_set_alarm,
188 .update_irq_enable = jz4740_rtc_update_irq_enable,
189 .alarm_irq_enable = jz4740_rtc_alarm_irq_enable, 182 .alarm_irq_enable = jz4740_rtc_alarm_irq_enable,
190}; 183};
191 184
diff --git a/drivers/rtc/rtc-mc13xxx.c b/drivers/rtc/rtc-mc13xxx.c
index 5314b153bfba..c42006469559 100644
--- a/drivers/rtc/rtc-mc13xxx.c
+++ b/drivers/rtc/rtc-mc13xxx.c
@@ -282,12 +282,6 @@ static irqreturn_t mc13xxx_rtc_update_handler(int irq, void *dev)
282 return IRQ_HANDLED; 282 return IRQ_HANDLED;
283} 283}
284 284
285static int mc13xxx_rtc_update_irq_enable(struct device *dev,
286 unsigned int enabled)
287{
288 return mc13xxx_rtc_irq_enable(dev, enabled, MC13XXX_IRQ_1HZ);
289}
290
291static int mc13xxx_rtc_alarm_irq_enable(struct device *dev, 285static int mc13xxx_rtc_alarm_irq_enable(struct device *dev,
292 unsigned int enabled) 286 unsigned int enabled)
293{ 287{
@@ -300,7 +294,6 @@ static const struct rtc_class_ops mc13xxx_rtc_ops = {
300 .read_alarm = mc13xxx_rtc_read_alarm, 294 .read_alarm = mc13xxx_rtc_read_alarm,
301 .set_alarm = mc13xxx_rtc_set_alarm, 295 .set_alarm = mc13xxx_rtc_set_alarm,
302 .alarm_irq_enable = mc13xxx_rtc_alarm_irq_enable, 296 .alarm_irq_enable = mc13xxx_rtc_alarm_irq_enable,
303 .update_irq_enable = mc13xxx_rtc_update_irq_enable,
304}; 297};
305 298
306static irqreturn_t mc13xxx_rtc_reset_handler(int irq, void *dev) 299static irqreturn_t mc13xxx_rtc_reset_handler(int irq, void *dev)
diff --git a/drivers/rtc/rtc-mpc5121.c b/drivers/rtc/rtc-mpc5121.c
index dfcdf0901d21..b40c1ff1ebc8 100644
--- a/drivers/rtc/rtc-mpc5121.c
+++ b/drivers/rtc/rtc-mpc5121.c
@@ -240,32 +240,12 @@ static int mpc5121_rtc_alarm_irq_enable(struct device *dev,
240 return 0; 240 return 0;
241} 241}
242 242
243static int mpc5121_rtc_update_irq_enable(struct device *dev,
244 unsigned int enabled)
245{
246 struct mpc5121_rtc_data *rtc = dev_get_drvdata(dev);
247 struct mpc5121_rtc_regs __iomem *regs = rtc->regs;
248 int val;
249
250 val = in_8(&regs->int_enable);
251
252 if (enabled)
253 val = (val & ~0x8) | 0x1;
254 else
255 val &= ~0x1;
256
257 out_8(&regs->int_enable, val);
258
259 return 0;
260}
261
262static const struct rtc_class_ops mpc5121_rtc_ops = { 243static const struct rtc_class_ops mpc5121_rtc_ops = {
263 .read_time = mpc5121_rtc_read_time, 244 .read_time = mpc5121_rtc_read_time,
264 .set_time = mpc5121_rtc_set_time, 245 .set_time = mpc5121_rtc_set_time,
265 .read_alarm = mpc5121_rtc_read_alarm, 246 .read_alarm = mpc5121_rtc_read_alarm,
266 .set_alarm = mpc5121_rtc_set_alarm, 247 .set_alarm = mpc5121_rtc_set_alarm,
267 .alarm_irq_enable = mpc5121_rtc_alarm_irq_enable, 248 .alarm_irq_enable = mpc5121_rtc_alarm_irq_enable,
268 .update_irq_enable = mpc5121_rtc_update_irq_enable,
269}; 249};
270 250
271static int __devinit mpc5121_rtc_probe(struct platform_device *op, 251static int __devinit mpc5121_rtc_probe(struct platform_device *op,
diff --git a/drivers/rtc/rtc-mrst.c b/drivers/rtc/rtc-mrst.c
index 8d2cf6027120..b86bc328463b 100644
--- a/drivers/rtc/rtc-mrst.c
+++ b/drivers/rtc/rtc-mrst.c
@@ -247,25 +247,6 @@ static int mrst_set_alarm(struct device *dev, struct rtc_wkalrm *t)
247 return 0; 247 return 0;
248} 248}
249 249
250static int mrst_irq_set_state(struct device *dev, int enabled)
251{
252 struct mrst_rtc *mrst = dev_get_drvdata(dev);
253 unsigned long flags;
254
255 if (!mrst->irq)
256 return -ENXIO;
257
258 spin_lock_irqsave(&rtc_lock, flags);
259
260 if (enabled)
261 mrst_irq_enable(mrst, RTC_PIE);
262 else
263 mrst_irq_disable(mrst, RTC_PIE);
264
265 spin_unlock_irqrestore(&rtc_lock, flags);
266 return 0;
267}
268
269/* Currently, the vRTC doesn't support UIE ON/OFF */ 250/* Currently, the vRTC doesn't support UIE ON/OFF */
270static int mrst_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) 251static int mrst_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
271{ 252{
@@ -312,7 +293,6 @@ static const struct rtc_class_ops mrst_rtc_ops = {
312 .read_alarm = mrst_read_alarm, 293 .read_alarm = mrst_read_alarm,
313 .set_alarm = mrst_set_alarm, 294 .set_alarm = mrst_set_alarm,
314 .proc = mrst_procfs, 295 .proc = mrst_procfs,
315 .irq_set_state = mrst_irq_set_state,
316 .alarm_irq_enable = mrst_rtc_alarm_irq_enable, 296 .alarm_irq_enable = mrst_rtc_alarm_irq_enable,
317}; 297};
318 298
diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c
index 0b06c1e03fd5..826ab64a8fa9 100644
--- a/drivers/rtc/rtc-mxc.c
+++ b/drivers/rtc/rtc-mxc.c
@@ -274,12 +274,6 @@ static int mxc_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
274 return 0; 274 return 0;
275} 275}
276 276
277static int mxc_rtc_update_irq_enable(struct device *dev, unsigned int enabled)
278{
279 mxc_rtc_irq_enable(dev, RTC_1HZ_BIT, enabled);
280 return 0;
281}
282
283/* 277/*
284 * This function reads the current RTC time into tm in Gregorian date. 278 * This function reads the current RTC time into tm in Gregorian date.
285 */ 279 */
@@ -368,7 +362,6 @@ static struct rtc_class_ops mxc_rtc_ops = {
368 .read_alarm = mxc_rtc_read_alarm, 362 .read_alarm = mxc_rtc_read_alarm,
369 .set_alarm = mxc_rtc_set_alarm, 363 .set_alarm = mxc_rtc_set_alarm,
370 .alarm_irq_enable = mxc_rtc_alarm_irq_enable, 364 .alarm_irq_enable = mxc_rtc_alarm_irq_enable,
371 .update_irq_enable = mxc_rtc_update_irq_enable,
372}; 365};
373 366
374static int __init mxc_rtc_probe(struct platform_device *pdev) 367static int __init mxc_rtc_probe(struct platform_device *pdev)
diff --git a/drivers/rtc/rtc-nuc900.c b/drivers/rtc/rtc-nuc900.c
index ddb0857e15a4..781068d62f23 100644
--- a/drivers/rtc/rtc-nuc900.c
+++ b/drivers/rtc/rtc-nuc900.c
@@ -134,20 +134,6 @@ static void nuc900_rtc_bin2bcd(struct device *dev, struct rtc_time *settm,
134 gettm->bcd_hour = bin2bcd(settm->tm_hour) << 16; 134 gettm->bcd_hour = bin2bcd(settm->tm_hour) << 16;
135} 135}
136 136
137static int nuc900_update_irq_enable(struct device *dev, unsigned int enabled)
138{
139 struct nuc900_rtc *rtc = dev_get_drvdata(dev);
140
141 if (enabled)
142 __raw_writel(__raw_readl(rtc->rtc_reg + REG_RTC_RIER)|
143 (TICKINTENB), rtc->rtc_reg + REG_RTC_RIER);
144 else
145 __raw_writel(__raw_readl(rtc->rtc_reg + REG_RTC_RIER)&
146 (~TICKINTENB), rtc->rtc_reg + REG_RTC_RIER);
147
148 return 0;
149}
150
151static int nuc900_alarm_irq_enable(struct device *dev, unsigned int enabled) 137static int nuc900_alarm_irq_enable(struct device *dev, unsigned int enabled)
152{ 138{
153 struct nuc900_rtc *rtc = dev_get_drvdata(dev); 139 struct nuc900_rtc *rtc = dev_get_drvdata(dev);
@@ -234,7 +220,6 @@ static struct rtc_class_ops nuc900_rtc_ops = {
234 .read_alarm = nuc900_rtc_read_alarm, 220 .read_alarm = nuc900_rtc_read_alarm,
235 .set_alarm = nuc900_rtc_set_alarm, 221 .set_alarm = nuc900_rtc_set_alarm,
236 .alarm_irq_enable = nuc900_alarm_irq_enable, 222 .alarm_irq_enable = nuc900_alarm_irq_enable,
237 .update_irq_enable = nuc900_update_irq_enable,
238}; 223};
239 224
240static int __devinit nuc900_rtc_probe(struct platform_device *pdev) 225static int __devinit nuc900_rtc_probe(struct platform_device *pdev)
diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index b4dbf3a319b3..de0dd7b1f146 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -135,44 +135,6 @@ static irqreturn_t rtc_irq(int irq, void *rtc)
135 return IRQ_HANDLED; 135 return IRQ_HANDLED;
136} 136}
137 137
138#ifdef CONFIG_RTC_INTF_DEV
139
140static int
141omap_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
142{
143 u8 reg;
144
145 switch (cmd) {
146 case RTC_UIE_OFF:
147 case RTC_UIE_ON:
148 break;
149 default:
150 return -ENOIOCTLCMD;
151 }
152
153 local_irq_disable();
154 rtc_wait_not_busy();
155 reg = rtc_read(OMAP_RTC_INTERRUPTS_REG);
156 switch (cmd) {
157 /* UIE = Update Interrupt Enable (1/second) */
158 case RTC_UIE_OFF:
159 reg &= ~OMAP_RTC_INTERRUPTS_IT_TIMER;
160 break;
161 case RTC_UIE_ON:
162 reg |= OMAP_RTC_INTERRUPTS_IT_TIMER;
163 break;
164 }
165 rtc_wait_not_busy();
166 rtc_write(reg, OMAP_RTC_INTERRUPTS_REG);
167 local_irq_enable();
168
169 return 0;
170}
171
172#else
173#define omap_rtc_ioctl NULL
174#endif
175
176static int omap_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) 138static int omap_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
177{ 139{
178 u8 reg; 140 u8 reg;
@@ -313,7 +275,6 @@ static int omap_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
313} 275}
314 276
315static struct rtc_class_ops omap_rtc_ops = { 277static struct rtc_class_ops omap_rtc_ops = {
316 .ioctl = omap_rtc_ioctl,
317 .read_time = omap_rtc_read_time, 278 .read_time = omap_rtc_read_time,
318 .set_time = omap_rtc_set_time, 279 .set_time = omap_rtc_set_time,
319 .read_alarm = omap_rtc_read_alarm, 280 .read_alarm = omap_rtc_read_alarm,
diff --git a/drivers/rtc/rtc-pcap.c b/drivers/rtc/rtc-pcap.c
index 25c0b3fd44f1..a633abc42896 100644
--- a/drivers/rtc/rtc-pcap.c
+++ b/drivers/rtc/rtc-pcap.c
@@ -131,18 +131,12 @@ static int pcap_rtc_alarm_irq_enable(struct device *dev, unsigned int en)
131 return pcap_rtc_irq_enable(dev, PCAP_IRQ_TODA, en); 131 return pcap_rtc_irq_enable(dev, PCAP_IRQ_TODA, en);
132} 132}
133 133
134static int pcap_rtc_update_irq_enable(struct device *dev, unsigned int en)
135{
136 return pcap_rtc_irq_enable(dev, PCAP_IRQ_1HZ, en);
137}
138
139static const struct rtc_class_ops pcap_rtc_ops = { 134static const struct rtc_class_ops pcap_rtc_ops = {
140 .read_time = pcap_rtc_read_time, 135 .read_time = pcap_rtc_read_time,
141 .read_alarm = pcap_rtc_read_alarm, 136 .read_alarm = pcap_rtc_read_alarm,
142 .set_alarm = pcap_rtc_set_alarm, 137 .set_alarm = pcap_rtc_set_alarm,
143 .set_mmss = pcap_rtc_set_mmss, 138 .set_mmss = pcap_rtc_set_mmss,
144 .alarm_irq_enable = pcap_rtc_alarm_irq_enable, 139 .alarm_irq_enable = pcap_rtc_alarm_irq_enable,
145 .update_irq_enable = pcap_rtc_update_irq_enable,
146}; 140};
147 141
148static int __devinit pcap_rtc_probe(struct platform_device *pdev) 142static int __devinit pcap_rtc_probe(struct platform_device *pdev)
diff --git a/drivers/rtc/rtc-pcf50633.c b/drivers/rtc/rtc-pcf50633.c
index 16edf94ab42f..f90c574f9d05 100644
--- a/drivers/rtc/rtc-pcf50633.c
+++ b/drivers/rtc/rtc-pcf50633.c
@@ -106,25 +106,6 @@ pcf50633_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
106 return 0; 106 return 0;
107} 107}
108 108
109static int
110pcf50633_rtc_update_irq_enable(struct device *dev, unsigned int enabled)
111{
112 struct pcf50633_rtc *rtc = dev_get_drvdata(dev);
113 int err;
114
115 if (enabled)
116 err = pcf50633_irq_unmask(rtc->pcf, PCF50633_IRQ_SECOND);
117 else
118 err = pcf50633_irq_mask(rtc->pcf, PCF50633_IRQ_SECOND);
119
120 if (err < 0)
121 return err;
122
123 rtc->second_enabled = enabled;
124
125 return 0;
126}
127
128static int pcf50633_rtc_read_time(struct device *dev, struct rtc_time *tm) 109static int pcf50633_rtc_read_time(struct device *dev, struct rtc_time *tm)
129{ 110{
130 struct pcf50633_rtc *rtc; 111 struct pcf50633_rtc *rtc;
@@ -262,8 +243,7 @@ static struct rtc_class_ops pcf50633_rtc_ops = {
262 .set_time = pcf50633_rtc_set_time, 243 .set_time = pcf50633_rtc_set_time,
263 .read_alarm = pcf50633_rtc_read_alarm, 244 .read_alarm = pcf50633_rtc_read_alarm,
264 .set_alarm = pcf50633_rtc_set_alarm, 245 .set_alarm = pcf50633_rtc_set_alarm,
265 .alarm_irq_enable = pcf50633_rtc_alarm_irq_enable, 246 .alarm_irq_enable = pcf50633_rtc_alarm_irq_enable,
266 .update_irq_enable = pcf50633_rtc_update_irq_enable,
267}; 247};
268 248
269static void pcf50633_rtc_irq(int irq, void *data) 249static void pcf50633_rtc_irq(int irq, void *data)
diff --git a/drivers/rtc/rtc-pl030.c b/drivers/rtc/rtc-pl030.c
index bbdb2f02798a..d554368c9f57 100644
--- a/drivers/rtc/rtc-pl030.c
+++ b/drivers/rtc/rtc-pl030.c
@@ -35,11 +35,6 @@ static irqreturn_t pl030_interrupt(int irq, void *dev_id)
35 return IRQ_HANDLED; 35 return IRQ_HANDLED;
36} 36}
37 37
38static int pl030_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
39{
40 return -ENOIOCTLCMD;
41}
42
43static int pl030_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) 38static int pl030_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
44{ 39{
45 struct pl030_rtc *rtc = dev_get_drvdata(dev); 40 struct pl030_rtc *rtc = dev_get_drvdata(dev);
@@ -96,7 +91,6 @@ static int pl030_set_time(struct device *dev, struct rtc_time *tm)
96} 91}
97 92
98static const struct rtc_class_ops pl030_ops = { 93static const struct rtc_class_ops pl030_ops = {
99 .ioctl = pl030_ioctl,
100 .read_time = pl030_read_time, 94 .read_time = pl030_read_time,
101 .set_time = pl030_set_time, 95 .set_time = pl030_set_time,
102 .read_alarm = pl030_read_alarm, 96 .read_alarm = pl030_read_alarm,
diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c
index b7a6690e5b35..d829ea63c4fb 100644
--- a/drivers/rtc/rtc-pl031.c
+++ b/drivers/rtc/rtc-pl031.c
@@ -293,57 +293,6 @@ static int pl031_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
293 return ret; 293 return ret;
294} 294}
295 295
296/* Periodic interrupt is only available in ST variants. */
297static int pl031_irq_set_state(struct device *dev, int enabled)
298{
299 struct pl031_local *ldata = dev_get_drvdata(dev);
300
301 if (enabled == 1) {
302 /* Clear any pending timer interrupt. */
303 writel(RTC_BIT_PI, ldata->base + RTC_ICR);
304
305 writel(readl(ldata->base + RTC_IMSC) | RTC_BIT_PI,
306 ldata->base + RTC_IMSC);
307
308 /* Now start the timer */
309 writel(readl(ldata->base + RTC_TCR) | RTC_TCR_EN,
310 ldata->base + RTC_TCR);
311
312 } else {
313 writel(readl(ldata->base + RTC_IMSC) & (~RTC_BIT_PI),
314 ldata->base + RTC_IMSC);
315
316 /* Also stop the timer */
317 writel(readl(ldata->base + RTC_TCR) & (~RTC_TCR_EN),
318 ldata->base + RTC_TCR);
319 }
320 /* Wait at least 1 RTC32 clock cycle to ensure next access
321 * to RTC_TCR will succeed.
322 */
323 udelay(40);
324
325 return 0;
326}
327
328static int pl031_irq_set_freq(struct device *dev, int freq)
329{
330 struct pl031_local *ldata = dev_get_drvdata(dev);
331
332 /* Cant set timer if it is already enabled */
333 if (readl(ldata->base + RTC_TCR) & RTC_TCR_EN) {
334 dev_err(dev, "can't change frequency while timer enabled\n");
335 return -EINVAL;
336 }
337
338 /* If self start bit in RTC_TCR is set timer will start here,
339 * but we never set that bit. Instead we start the timer when
340 * set_state is called with enabled == 1.
341 */
342 writel(RTC_TIMER_FREQ / freq, ldata->base + RTC_TLR);
343
344 return 0;
345}
346
347static int pl031_remove(struct amba_device *adev) 296static int pl031_remove(struct amba_device *adev)
348{ 297{
349 struct pl031_local *ldata = dev_get_drvdata(&adev->dev); 298 struct pl031_local *ldata = dev_get_drvdata(&adev->dev);
@@ -440,8 +389,6 @@ static struct rtc_class_ops stv1_pl031_ops = {
440 .read_alarm = pl031_read_alarm, 389 .read_alarm = pl031_read_alarm,
441 .set_alarm = pl031_set_alarm, 390 .set_alarm = pl031_set_alarm,
442 .alarm_irq_enable = pl031_alarm_irq_enable, 391 .alarm_irq_enable = pl031_alarm_irq_enable,
443 .irq_set_state = pl031_irq_set_state,
444 .irq_set_freq = pl031_irq_set_freq,
445}; 392};
446 393
447/* And the second ST derivative */ 394/* And the second ST derivative */
@@ -451,8 +398,6 @@ static struct rtc_class_ops stv2_pl031_ops = {
451 .read_alarm = pl031_stv2_read_alarm, 398 .read_alarm = pl031_stv2_read_alarm,
452 .set_alarm = pl031_stv2_set_alarm, 399 .set_alarm = pl031_stv2_set_alarm,
453 .alarm_irq_enable = pl031_alarm_irq_enable, 400 .alarm_irq_enable = pl031_alarm_irq_enable,
454 .irq_set_state = pl031_irq_set_state,
455 .irq_set_freq = pl031_irq_set_freq,
456}; 401};
457 402
458static struct amba_id pl031_ids[] = { 403static struct amba_id pl031_ids[] = {
diff --git a/drivers/rtc/rtc-proc.c b/drivers/rtc/rtc-proc.c
index 242bbf86c74a..0a59fda5c09d 100644
--- a/drivers/rtc/rtc-proc.c
+++ b/drivers/rtc/rtc-proc.c
@@ -69,6 +69,14 @@ static int rtc_proc_show(struct seq_file *seq, void *offset)
69 alrm.enabled ? "yes" : "no"); 69 alrm.enabled ? "yes" : "no");
70 seq_printf(seq, "alrm_pending\t: %s\n", 70 seq_printf(seq, "alrm_pending\t: %s\n",
71 alrm.pending ? "yes" : "no"); 71 alrm.pending ? "yes" : "no");
72 seq_printf(seq, "update IRQ enabled\t: %s\n",
73 (rtc->uie_rtctimer.enabled) ? "yes" : "no");
74 seq_printf(seq, "periodic IRQ enabled\t: %s\n",
75 (rtc->pie_enabled) ? "yes" : "no");
76 seq_printf(seq, "periodic IRQ frequency\t: %d\n",
77 rtc->irq_freq);
78 seq_printf(seq, "max user IRQ frequency\t: %d\n",
79 rtc->max_user_freq);
72 } 80 }
73 81
74 seq_printf(seq, "24hr\t\t: yes\n"); 82 seq_printf(seq, "24hr\t\t: yes\n");
diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c
index 29e867a1aaa8..fc9f4991574b 100644
--- a/drivers/rtc/rtc-pxa.c
+++ b/drivers/rtc/rtc-pxa.c
@@ -209,32 +209,6 @@ static void pxa_rtc_release(struct device *dev)
209 free_irq(pxa_rtc->irq_1Hz, dev); 209 free_irq(pxa_rtc->irq_1Hz, dev);
210} 210}
211 211
212static int pxa_periodic_irq_set_freq(struct device *dev, int freq)
213{
214 struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
215 int period_ms;
216
217 if (freq < 1 || freq > MAXFREQ_PERIODIC)
218 return -EINVAL;
219
220 period_ms = 1000 / freq;
221 rtc_writel(pxa_rtc, PIAR, period_ms);
222
223 return 0;
224}
225
226static int pxa_periodic_irq_set_state(struct device *dev, int enabled)
227{
228 struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
229
230 if (enabled)
231 rtsr_set_bits(pxa_rtc, RTSR_PIALE | RTSR_PICE);
232 else
233 rtsr_clear_bits(pxa_rtc, RTSR_PIALE | RTSR_PICE);
234
235 return 0;
236}
237
238static int pxa_alarm_irq_enable(struct device *dev, unsigned int enabled) 212static int pxa_alarm_irq_enable(struct device *dev, unsigned int enabled)
239{ 213{
240 struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev); 214 struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
@@ -250,21 +224,6 @@ static int pxa_alarm_irq_enable(struct device *dev, unsigned int enabled)
250 return 0; 224 return 0;
251} 225}
252 226
253static int pxa_update_irq_enable(struct device *dev, unsigned int enabled)
254{
255 struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
256
257 spin_lock_irq(&pxa_rtc->lock);
258
259 if (enabled)
260 rtsr_set_bits(pxa_rtc, RTSR_HZE);
261 else
262 rtsr_clear_bits(pxa_rtc, RTSR_HZE);
263
264 spin_unlock_irq(&pxa_rtc->lock);
265 return 0;
266}
267
268static int pxa_rtc_read_time(struct device *dev, struct rtc_time *tm) 227static int pxa_rtc_read_time(struct device *dev, struct rtc_time *tm)
269{ 228{
270 struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev); 229 struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
@@ -346,10 +305,7 @@ static const struct rtc_class_ops pxa_rtc_ops = {
346 .read_alarm = pxa_rtc_read_alarm, 305 .read_alarm = pxa_rtc_read_alarm,
347 .set_alarm = pxa_rtc_set_alarm, 306 .set_alarm = pxa_rtc_set_alarm,
348 .alarm_irq_enable = pxa_alarm_irq_enable, 307 .alarm_irq_enable = pxa_alarm_irq_enable,
349 .update_irq_enable = pxa_update_irq_enable,
350 .proc = pxa_rtc_proc, 308 .proc = pxa_rtc_proc,
351 .irq_set_state = pxa_periodic_irq_set_state,
352 .irq_set_freq = pxa_periodic_irq_set_freq,
353}; 309};
354 310
355static int __init pxa_rtc_probe(struct platform_device *pdev) 311static int __init pxa_rtc_probe(struct platform_device *pdev)
diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c
index 6aaa1550e3b1..85c1b848dd72 100644
--- a/drivers/rtc/rtc-rs5c372.c
+++ b/drivers/rtc/rtc-rs5c372.c
@@ -281,57 +281,6 @@ static int rs5c372_rtc_set_time(struct device *dev, struct rtc_time *tm)
281 return rs5c372_set_datetime(to_i2c_client(dev), tm); 281 return rs5c372_set_datetime(to_i2c_client(dev), tm);
282} 282}
283 283
284#if defined(CONFIG_RTC_INTF_DEV) || defined(CONFIG_RTC_INTF_DEV_MODULE)
285
286static int
287rs5c_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
288{
289 struct i2c_client *client = to_i2c_client(dev);
290 struct rs5c372 *rs5c = i2c_get_clientdata(client);
291 unsigned char buf;
292 int status, addr;
293
294 buf = rs5c->regs[RS5C_REG_CTRL1];
295 switch (cmd) {
296 case RTC_UIE_OFF:
297 case RTC_UIE_ON:
298 /* some 327a modes use a different IRQ pin for 1Hz irqs */
299 if (rs5c->type == rtc_rs5c372a
300 && (buf & RS5C372A_CTRL1_SL1))
301 return -ENOIOCTLCMD;
302 default:
303 return -ENOIOCTLCMD;
304 }
305
306 status = rs5c_get_regs(rs5c);
307 if (status < 0)
308 return status;
309
310 addr = RS5C_ADDR(RS5C_REG_CTRL1);
311 switch (cmd) {
312 case RTC_UIE_OFF: /* update off */
313 buf &= ~RS5C_CTRL1_CT_MASK;
314 break;
315 case RTC_UIE_ON: /* update on */
316 buf &= ~RS5C_CTRL1_CT_MASK;
317 buf |= RS5C_CTRL1_CT4;
318 break;
319 }
320
321 if (i2c_smbus_write_byte_data(client, addr, buf) < 0) {
322 printk(KERN_WARNING "%s: can't update alarm\n",
323 rs5c->rtc->name);
324 status = -EIO;
325 } else
326 rs5c->regs[RS5C_REG_CTRL1] = buf;
327
328 return status;
329}
330
331#else
332#define rs5c_rtc_ioctl NULL
333#endif
334
335 284
336static int rs5c_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) 285static int rs5c_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
337{ 286{
@@ -480,7 +429,6 @@ static int rs5c372_rtc_proc(struct device *dev, struct seq_file *seq)
480 429
481static const struct rtc_class_ops rs5c372_rtc_ops = { 430static const struct rtc_class_ops rs5c372_rtc_ops = {
482 .proc = rs5c372_rtc_proc, 431 .proc = rs5c372_rtc_proc,
483 .ioctl = rs5c_rtc_ioctl,
484 .read_time = rs5c372_rtc_read_time, 432 .read_time = rs5c372_rtc_read_time,
485 .set_time = rs5c372_rtc_set_time, 433 .set_time = rs5c372_rtc_set_time,
486 .read_alarm = rs5c_read_alarm, 434 .read_alarm = rs5c_read_alarm,
diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index af32a62e12a8..fde172fb2abe 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -424,37 +424,12 @@ static int rx8025_alarm_irq_enable(struct device *dev, unsigned int enabled)
424 return 0; 424 return 0;
425} 425}
426 426
427static int rx8025_irq_set_state(struct device *dev, int enabled)
428{
429 struct i2c_client *client = to_i2c_client(dev);
430 struct rx8025_data *rx8025 = i2c_get_clientdata(client);
431 int ctrl1;
432 int err;
433
434 if (client->irq <= 0)
435 return -ENXIO;
436
437 ctrl1 = rx8025->ctrl1 & ~RX8025_BIT_CTRL1_CT;
438 if (enabled)
439 ctrl1 |= RX8025_BIT_CTRL1_CT_1HZ;
440 if (ctrl1 != rx8025->ctrl1) {
441 rx8025->ctrl1 = ctrl1;
442 err = rx8025_write_reg(rx8025->client, RX8025_REG_CTRL1,
443 rx8025->ctrl1);
444 if (err)
445 return err;
446 }
447
448 return 0;
449}
450
451static struct rtc_class_ops rx8025_rtc_ops = { 427static struct rtc_class_ops rx8025_rtc_ops = {
452 .read_time = rx8025_get_time, 428 .read_time = rx8025_get_time,
453 .set_time = rx8025_set_time, 429 .set_time = rx8025_set_time,
454 .read_alarm = rx8025_read_alarm, 430 .read_alarm = rx8025_read_alarm,
455 .set_alarm = rx8025_set_alarm, 431 .set_alarm = rx8025_set_alarm,
456 .alarm_irq_enable = rx8025_alarm_irq_enable, 432 .alarm_irq_enable = rx8025_alarm_irq_enable,
457 .irq_set_state = rx8025_irq_set_state,
458}; 433};
459 434
460/* 435/*
diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index cf953ecbfca9..714964913e5e 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -77,47 +77,18 @@ static irqreturn_t s3c_rtc_tickirq(int irq, void *id)
77} 77}
78 78
79/* Update control registers */ 79/* Update control registers */
80static void s3c_rtc_setaie(int to) 80static int s3c_rtc_setaie(struct device *dev, unsigned int enabled)
81{ 81{
82 unsigned int tmp; 82 unsigned int tmp;
83 83
84 pr_debug("%s: aie=%d\n", __func__, to); 84 pr_debug("%s: aie=%d\n", __func__, enabled);
85 85
86 tmp = readb(s3c_rtc_base + S3C2410_RTCALM) & ~S3C2410_RTCALM_ALMEN; 86 tmp = readb(s3c_rtc_base + S3C2410_RTCALM) & ~S3C2410_RTCALM_ALMEN;
87 87
88 if (to) 88 if (enabled)
89 tmp |= S3C2410_RTCALM_ALMEN; 89 tmp |= S3C2410_RTCALM_ALMEN;
90 90
91 writeb(tmp, s3c_rtc_base + S3C2410_RTCALM); 91 writeb(tmp, s3c_rtc_base + S3C2410_RTCALM);
92}
93
94static int s3c_rtc_setpie(struct device *dev, int enabled)
95{
96 unsigned int tmp;
97
98 pr_debug("%s: pie=%d\n", __func__, enabled);
99
100 spin_lock_irq(&s3c_rtc_pie_lock);
101
102 if (s3c_rtc_cpu_type == TYPE_S3C64XX) {
103 tmp = readw(s3c_rtc_base + S3C2410_RTCCON);
104 tmp &= ~S3C64XX_RTCCON_TICEN;
105
106 if (enabled)
107 tmp |= S3C64XX_RTCCON_TICEN;
108
109 writew(tmp, s3c_rtc_base + S3C2410_RTCCON);
110 } else {
111 tmp = readb(s3c_rtc_base + S3C2410_TICNT);
112 tmp &= ~S3C2410_TICNT_ENABLE;
113
114 if (enabled)
115 tmp |= S3C2410_TICNT_ENABLE;
116
117 writeb(tmp, s3c_rtc_base + S3C2410_TICNT);
118 }
119
120 spin_unlock_irq(&s3c_rtc_pie_lock);
121 92
122 return 0; 93 return 0;
123} 94}
@@ -308,7 +279,7 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
308 279
309 writeb(alrm_en, base + S3C2410_RTCALM); 280 writeb(alrm_en, base + S3C2410_RTCALM);
310 281
311 s3c_rtc_setaie(alrm->enabled); 282 s3c_rtc_setaie(dev, alrm->enabled);
312 283
313 return 0; 284 return 0;
314} 285}
@@ -377,8 +348,6 @@ static const struct rtc_class_ops s3c_rtcops = {
377 .set_time = s3c_rtc_settime, 348 .set_time = s3c_rtc_settime,
378 .read_alarm = s3c_rtc_getalarm, 349 .read_alarm = s3c_rtc_getalarm,
379 .set_alarm = s3c_rtc_setalarm, 350 .set_alarm = s3c_rtc_setalarm,
380 .irq_set_freq = s3c_rtc_setfreq,
381 .irq_set_state = s3c_rtc_setpie,
382 .proc = s3c_rtc_proc, 351 .proc = s3c_rtc_proc,
383 .alarm_irq_enable = s3c_rtc_setaie, 352 .alarm_irq_enable = s3c_rtc_setaie,
384}; 353};
@@ -440,7 +409,7 @@ static int __devexit s3c_rtc_remove(struct platform_device *dev)
440 rtc_device_unregister(rtc); 409 rtc_device_unregister(rtc);
441 410
442 s3c_rtc_setpie(&dev->dev, 0); 411 s3c_rtc_setpie(&dev->dev, 0);
443 s3c_rtc_setaie(0); 412 s3c_rtc_setaie(&dev->dev, 0);
444 413
445 clk_disable(rtc_clk); 414 clk_disable(rtc_clk);
446 clk_put(rtc_clk); 415 clk_put(rtc_clk);
diff --git a/drivers/rtc/rtc-sa1100.c b/drivers/rtc/rtc-sa1100.c
index 5dfe5ffcb0d3..0b40bb88a884 100644
--- a/drivers/rtc/rtc-sa1100.c
+++ b/drivers/rtc/rtc-sa1100.c
@@ -43,7 +43,6 @@
43#define RTC_DEF_TRIM 0 43#define RTC_DEF_TRIM 0
44 44
45static const unsigned long RTC_FREQ = 1024; 45static const unsigned long RTC_FREQ = 1024;
46static unsigned long timer_freq;
47static struct rtc_time rtc_alarm; 46static struct rtc_time rtc_alarm;
48static DEFINE_SPINLOCK(sa1100_rtc_lock); 47static DEFINE_SPINLOCK(sa1100_rtc_lock);
49 48
@@ -156,114 +155,11 @@ static irqreturn_t sa1100_rtc_interrupt(int irq, void *dev_id)
156 return IRQ_HANDLED; 155 return IRQ_HANDLED;
157} 156}
158 157
159static int sa1100_irq_set_freq(struct device *dev, int freq)
160{
161 if (freq < 1 || freq > timer_freq) {
162 return -EINVAL;
163 } else {
164 struct rtc_device *rtc = (struct rtc_device *)dev;
165
166 rtc->irq_freq = freq;
167
168 return 0;
169 }
170}
171
172static int rtc_timer1_count;
173
174static int sa1100_irq_set_state(struct device *dev, int enabled)
175{
176 spin_lock_irq(&sa1100_rtc_lock);
177 if (enabled) {
178 struct rtc_device *rtc = (struct rtc_device *)dev;
179
180 OSMR1 = timer_freq / rtc->irq_freq + OSCR;
181 OIER |= OIER_E1;
182 rtc_timer1_count = 1;
183 } else {
184 OIER &= ~OIER_E1;
185 }
186 spin_unlock_irq(&sa1100_rtc_lock);
187
188 return 0;
189}
190
191static inline int sa1100_timer1_retrigger(struct rtc_device *rtc)
192{
193 unsigned long diff;
194 unsigned long period = timer_freq / rtc->irq_freq;
195
196 spin_lock_irq(&sa1100_rtc_lock);
197
198 do {
199 OSMR1 += period;
200 diff = OSMR1 - OSCR;
201 /* If OSCR > OSMR1, diff is a very large number (unsigned
202 * math). This means we have a lost interrupt. */
203 } while (diff > period);
204 OIER |= OIER_E1;
205
206 spin_unlock_irq(&sa1100_rtc_lock);
207
208 return 0;
209}
210
211static irqreturn_t timer1_interrupt(int irq, void *dev_id)
212{
213 struct platform_device *pdev = to_platform_device(dev_id);
214 struct rtc_device *rtc = platform_get_drvdata(pdev);
215
216 /*
217 * If we match for the first time, rtc_timer1_count will be 1.
218 * Otherwise, we wrapped around (very unlikely but
219 * still possible) so compute the amount of missed periods.
220 * The match reg is updated only when the data is actually retrieved
221 * to avoid unnecessary interrupts.
222 */
223 OSSR = OSSR_M1; /* clear match on timer1 */
224
225 rtc_update_irq(rtc, rtc_timer1_count, RTC_PF | RTC_IRQF);
226
227 if (rtc_timer1_count == 1)
228 rtc_timer1_count =
229 (rtc->irq_freq * ((1 << 30) / (timer_freq >> 2)));
230
231 /* retrigger. */
232 sa1100_timer1_retrigger(rtc);
233
234 return IRQ_HANDLED;
235}
236
237static int sa1100_rtc_read_callback(struct device *dev, int data)
238{
239 if (data & RTC_PF) {
240 struct rtc_device *rtc = (struct rtc_device *)dev;
241
242 /* interpolate missed periods and set match for the next */
243 unsigned long period = timer_freq / rtc->irq_freq;
244 unsigned long oscr = OSCR;
245 unsigned long osmr1 = OSMR1;
246 unsigned long missed = (oscr - osmr1)/period;
247 data += missed << 8;
248 OSSR = OSSR_M1; /* clear match on timer 1 */
249 OSMR1 = osmr1 + (missed + 1)*period;
250 /* Ensure we didn't miss another match in the mean time.
251 * Here we compare (match - OSCR) 8 instead of 0 --
252 * see comment in pxa_timer_interrupt() for explanation.
253 */
254 while ((signed long)((osmr1 = OSMR1) - OSCR) <= 8) {
255 data += 0x100;
256 OSSR = OSSR_M1; /* clear match on timer 1 */
257 OSMR1 = osmr1 + period;
258 }
259 }
260 return data;
261}
262
263static int sa1100_rtc_open(struct device *dev) 158static int sa1100_rtc_open(struct device *dev)
264{ 159{
265 int ret; 160 int ret;
266 struct rtc_device *rtc = (struct rtc_device *)dev; 161 struct platform_device *plat_dev = to_platform_device(dev);
162 struct rtc_device *rtc = platform_get_drvdata(plat_dev);
267 163
268 ret = request_irq(IRQ_RTC1Hz, sa1100_rtc_interrupt, IRQF_DISABLED, 164 ret = request_irq(IRQ_RTC1Hz, sa1100_rtc_interrupt, IRQF_DISABLED,
269 "rtc 1Hz", dev); 165 "rtc 1Hz", dev);
@@ -277,19 +173,11 @@ static int sa1100_rtc_open(struct device *dev)
277 dev_err(dev, "IRQ %d already in use.\n", IRQ_RTCAlrm); 173 dev_err(dev, "IRQ %d already in use.\n", IRQ_RTCAlrm);
278 goto fail_ai; 174 goto fail_ai;
279 } 175 }
280 ret = request_irq(IRQ_OST1, timer1_interrupt, IRQF_DISABLED,
281 "rtc timer", dev);
282 if (ret) {
283 dev_err(dev, "IRQ %d already in use.\n", IRQ_OST1);
284 goto fail_pi;
285 }
286 rtc->max_user_freq = RTC_FREQ; 176 rtc->max_user_freq = RTC_FREQ;
287 sa1100_irq_set_freq(dev, RTC_FREQ); 177 rtc_irq_set_freq(rtc, NULL, RTC_FREQ);
288 178
289 return 0; 179 return 0;
290 180
291 fail_pi:
292 free_irq(IRQ_RTCAlrm, dev);
293 fail_ai: 181 fail_ai:
294 free_irq(IRQ_RTC1Hz, dev); 182 free_irq(IRQ_RTC1Hz, dev);
295 fail_ui: 183 fail_ui:
@@ -304,30 +192,10 @@ static void sa1100_rtc_release(struct device *dev)
304 OSSR = OSSR_M1; 192 OSSR = OSSR_M1;
305 spin_unlock_irq(&sa1100_rtc_lock); 193 spin_unlock_irq(&sa1100_rtc_lock);
306 194
307 free_irq(IRQ_OST1, dev);
308 free_irq(IRQ_RTCAlrm, dev); 195 free_irq(IRQ_RTCAlrm, dev);
309 free_irq(IRQ_RTC1Hz, dev); 196 free_irq(IRQ_RTC1Hz, dev);
310} 197}
311 198
312
313static int sa1100_rtc_ioctl(struct device *dev, unsigned int cmd,
314 unsigned long arg)
315{
316 switch (cmd) {
317 case RTC_UIE_OFF:
318 spin_lock_irq(&sa1100_rtc_lock);
319 RTSR &= ~RTSR_HZE;
320 spin_unlock_irq(&sa1100_rtc_lock);
321 return 0;
322 case RTC_UIE_ON:
323 spin_lock_irq(&sa1100_rtc_lock);
324 RTSR |= RTSR_HZE;
325 spin_unlock_irq(&sa1100_rtc_lock);
326 return 0;
327 }
328 return -ENOIOCTLCMD;
329}
330
331static int sa1100_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) 199static int sa1100_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
332{ 200{
333 spin_lock_irq(&sa1100_rtc_lock); 201 spin_lock_irq(&sa1100_rtc_lock);
@@ -386,31 +254,20 @@ static int sa1100_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
386 254
387static int sa1100_rtc_proc(struct device *dev, struct seq_file *seq) 255static int sa1100_rtc_proc(struct device *dev, struct seq_file *seq)
388{ 256{
389 struct rtc_device *rtc = (struct rtc_device *)dev; 257 seq_printf(seq, "trim/divider\t\t: 0x%08x\n", (u32) RTTR);
390 258 seq_printf(seq, "RTSR\t\t\t: 0x%08x\n", (u32)RTSR);
391 seq_printf(seq, "trim/divider\t: 0x%08x\n", (u32) RTTR);
392 seq_printf(seq, "update_IRQ\t: %s\n",
393 (RTSR & RTSR_HZE) ? "yes" : "no");
394 seq_printf(seq, "periodic_IRQ\t: %s\n",
395 (OIER & OIER_E1) ? "yes" : "no");
396 seq_printf(seq, "periodic_freq\t: %d\n", rtc->irq_freq);
397 seq_printf(seq, "RTSR\t\t: 0x%08x\n", (u32)RTSR);
398 259
399 return 0; 260 return 0;
400} 261}
401 262
402static const struct rtc_class_ops sa1100_rtc_ops = { 263static const struct rtc_class_ops sa1100_rtc_ops = {
403 .open = sa1100_rtc_open, 264 .open = sa1100_rtc_open,
404 .read_callback = sa1100_rtc_read_callback,
405 .release = sa1100_rtc_release, 265 .release = sa1100_rtc_release,
406 .ioctl = sa1100_rtc_ioctl,
407 .read_time = sa1100_rtc_read_time, 266 .read_time = sa1100_rtc_read_time,
408 .set_time = sa1100_rtc_set_time, 267 .set_time = sa1100_rtc_set_time,
409 .read_alarm = sa1100_rtc_read_alarm, 268 .read_alarm = sa1100_rtc_read_alarm,
410 .set_alarm = sa1100_rtc_set_alarm, 269 .set_alarm = sa1100_rtc_set_alarm,
411 .proc = sa1100_rtc_proc, 270 .proc = sa1100_rtc_proc,
412 .irq_set_freq = sa1100_irq_set_freq,
413 .irq_set_state = sa1100_irq_set_state,
414 .alarm_irq_enable = sa1100_rtc_alarm_irq_enable, 271 .alarm_irq_enable = sa1100_rtc_alarm_irq_enable,
415}; 272};
416 273
@@ -418,8 +275,6 @@ static int sa1100_rtc_probe(struct platform_device *pdev)
418{ 275{
419 struct rtc_device *rtc; 276 struct rtc_device *rtc;
420 277
421 timer_freq = get_clock_tick_rate();
422
423 /* 278 /*
424 * According to the manual we should be able to let RTTR be zero 279 * According to the manual we should be able to let RTTR be zero
425 * and then a default diviser for a 32.768KHz clock is used. 280 * and then a default diviser for a 32.768KHz clock is used.
@@ -445,11 +300,6 @@ static int sa1100_rtc_probe(struct platform_device *pdev)
445 300
446 platform_set_drvdata(pdev, rtc); 301 platform_set_drvdata(pdev, rtc);
447 302
448 /* Set the irq_freq */
449 /*TODO: Find out who is messing with this value after we initialize
450 * it here.*/
451 rtc->irq_freq = RTC_FREQ;
452
453 /* Fix for a nasty initialization problem the in SA11xx RTSR register. 303 /* Fix for a nasty initialization problem the in SA11xx RTSR register.
454 * See also the comments in sa1100_rtc_interrupt(). 304 * See also the comments in sa1100_rtc_interrupt().
455 * 305 *
diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index 93314a9e7fa9..e55dc1ac83ab 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -344,27 +344,6 @@ static inline void sh_rtc_setcie(struct device *dev, unsigned int enable)
344 spin_unlock_irq(&rtc->lock); 344 spin_unlock_irq(&rtc->lock);
345} 345}
346 346
347static int sh_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
348{
349 struct sh_rtc *rtc = dev_get_drvdata(dev);
350 unsigned int ret = 0;
351
352 switch (cmd) {
353 case RTC_UIE_OFF:
354 rtc->periodic_freq &= ~PF_OXS;
355 sh_rtc_setcie(dev, 0);
356 break;
357 case RTC_UIE_ON:
358 rtc->periodic_freq |= PF_OXS;
359 sh_rtc_setcie(dev, 1);
360 break;
361 default:
362 ret = -ENOIOCTLCMD;
363 }
364
365 return ret;
366}
367
368static int sh_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) 347static int sh_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
369{ 348{
370 sh_rtc_setaie(dev, enabled); 349 sh_rtc_setaie(dev, enabled);
@@ -598,13 +577,10 @@ static int sh_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *wkalrm)
598} 577}
599 578
600static struct rtc_class_ops sh_rtc_ops = { 579static struct rtc_class_ops sh_rtc_ops = {
601 .ioctl = sh_rtc_ioctl,
602 .read_time = sh_rtc_read_time, 580 .read_time = sh_rtc_read_time,
603 .set_time = sh_rtc_set_time, 581 .set_time = sh_rtc_set_time,
604 .read_alarm = sh_rtc_read_alarm, 582 .read_alarm = sh_rtc_read_alarm,
605 .set_alarm = sh_rtc_set_alarm, 583 .set_alarm = sh_rtc_set_alarm,
606 .irq_set_state = sh_rtc_irq_set_state,
607 .irq_set_freq = sh_rtc_irq_set_freq,
608 .proc = sh_rtc_proc, 584 .proc = sh_rtc_proc,
609 .alarm_irq_enable = sh_rtc_alarm_irq_enable, 585 .alarm_irq_enable = sh_rtc_alarm_irq_enable,
610}; 586};
diff --git a/drivers/rtc/rtc-stmp3xxx.c b/drivers/rtc/rtc-stmp3xxx.c
index 7e7d0c806f2d..572e9534b591 100644
--- a/drivers/rtc/rtc-stmp3xxx.c
+++ b/drivers/rtc/rtc-stmp3xxx.c
@@ -115,19 +115,6 @@ static int stmp3xxx_alarm_irq_enable(struct device *dev, unsigned int enabled)
115 return 0; 115 return 0;
116} 116}
117 117
118static int stmp3xxx_update_irq_enable(struct device *dev, unsigned int enabled)
119{
120 struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev);
121
122 if (enabled)
123 stmp3xxx_setl(BM_RTC_CTRL_ONEMSEC_IRQ_EN,
124 rtc_data->io + HW_RTC_CTRL);
125 else
126 stmp3xxx_clearl(BM_RTC_CTRL_ONEMSEC_IRQ_EN,
127 rtc_data->io + HW_RTC_CTRL);
128 return 0;
129}
130
131static int stmp3xxx_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm) 118static int stmp3xxx_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
132{ 119{
133 struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev); 120 struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev);
@@ -149,8 +136,6 @@ static int stmp3xxx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
149static struct rtc_class_ops stmp3xxx_rtc_ops = { 136static struct rtc_class_ops stmp3xxx_rtc_ops = {
150 .alarm_irq_enable = 137 .alarm_irq_enable =
151 stmp3xxx_alarm_irq_enable, 138 stmp3xxx_alarm_irq_enable,
152 .update_irq_enable =
153 stmp3xxx_update_irq_enable,
154 .read_time = stmp3xxx_rtc_gettime, 139 .read_time = stmp3xxx_rtc_gettime,
155 .set_mmss = stmp3xxx_rtc_set_mmss, 140 .set_mmss = stmp3xxx_rtc_set_mmss,
156 .read_alarm = stmp3xxx_rtc_read_alarm, 141 .read_alarm = stmp3xxx_rtc_read_alarm,
diff --git a/drivers/rtc/rtc-test.c b/drivers/rtc/rtc-test.c
index a82d6fe97076..7e96254bd365 100644
--- a/drivers/rtc/rtc-test.c
+++ b/drivers/rtc/rtc-test.c
@@ -78,11 +78,16 @@ static ssize_t test_irq_store(struct device *dev,
78 struct rtc_device *rtc = platform_get_drvdata(plat_dev); 78 struct rtc_device *rtc = platform_get_drvdata(plat_dev);
79 79
80 retval = count; 80 retval = count;
81 if (strncmp(buf, "tick", 4) == 0) 81 if (strncmp(buf, "tick", 4) == 0 && rtc->pie_enabled)
82 rtc_update_irq(rtc, 1, RTC_PF | RTC_IRQF); 82 rtc_update_irq(rtc, 1, RTC_PF | RTC_IRQF);
83 else if (strncmp(buf, "alarm", 5) == 0) 83 else if (strncmp(buf, "alarm", 5) == 0) {
84 rtc_update_irq(rtc, 1, RTC_AF | RTC_IRQF); 84 struct rtc_wkalrm alrm;
85 else if (strncmp(buf, "update", 6) == 0) 85 int err = rtc_read_alarm(rtc, &alrm);
86
87 if (!err && alrm.enabled)
88 rtc_update_irq(rtc, 1, RTC_AF | RTC_IRQF);
89
90 } else if (strncmp(buf, "update", 6) == 0 && rtc->uie_rtctimer.enabled)
86 rtc_update_irq(rtc, 1, RTC_UF | RTC_IRQF); 91 rtc_update_irq(rtc, 1, RTC_UF | RTC_IRQF);
87 else 92 else
88 retval = -EINVAL; 93 retval = -EINVAL;
diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c
index ed1b86828124..f9a2799c44d6 100644
--- a/drivers/rtc/rtc-twl.c
+++ b/drivers/rtc/rtc-twl.c
@@ -213,18 +213,6 @@ static int twl_rtc_alarm_irq_enable(struct device *dev, unsigned enabled)
213 return ret; 213 return ret;
214} 214}
215 215
216static int twl_rtc_update_irq_enable(struct device *dev, unsigned enabled)
217{
218 int ret;
219
220 if (enabled)
221 ret = set_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M);
222 else
223 ret = mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M);
224
225 return ret;
226}
227
228/* 216/*
229 * Gets current TWL RTC time and date parameters. 217 * Gets current TWL RTC time and date parameters.
230 * 218 *
@@ -433,7 +421,6 @@ static struct rtc_class_ops twl_rtc_ops = {
433 .read_alarm = twl_rtc_read_alarm, 421 .read_alarm = twl_rtc_read_alarm,
434 .set_alarm = twl_rtc_set_alarm, 422 .set_alarm = twl_rtc_set_alarm,
435 .alarm_irq_enable = twl_rtc_alarm_irq_enable, 423 .alarm_irq_enable = twl_rtc_alarm_irq_enable,
436 .update_irq_enable = twl_rtc_update_irq_enable,
437}; 424};
438 425
439/*----------------------------------------------------------------------*/ 426/*----------------------------------------------------------------------*/
diff --git a/drivers/rtc/rtc-vr41xx.c b/drivers/rtc/rtc-vr41xx.c
index 769190ac6d11..c5698cda366a 100644
--- a/drivers/rtc/rtc-vr41xx.c
+++ b/drivers/rtc/rtc-vr41xx.c
@@ -207,36 +207,6 @@ static int vr41xx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *wkalrm)
207 return 0; 207 return 0;
208} 208}
209 209
210static int vr41xx_rtc_irq_set_freq(struct device *dev, int freq)
211{
212 u64 count;
213
214 if (!is_power_of_2(freq))
215 return -EINVAL;
216 count = RTC_FREQUENCY;
217 do_div(count, freq);
218
219 spin_lock_irq(&rtc_lock);
220
221 periodic_count = count;
222 rtc1_write(RTCL1LREG, periodic_count);
223 rtc1_write(RTCL1HREG, periodic_count >> 16);
224
225 spin_unlock_irq(&rtc_lock);
226
227 return 0;
228}
229
230static int vr41xx_rtc_irq_set_state(struct device *dev, int enabled)
231{
232 if (enabled)
233 enable_irq(pie_irq);
234 else
235 disable_irq(pie_irq);
236
237 return 0;
238}
239
240static int vr41xx_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) 210static int vr41xx_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
241{ 211{
242 switch (cmd) { 212 switch (cmd) {
@@ -308,8 +278,6 @@ static const struct rtc_class_ops vr41xx_rtc_ops = {
308 .set_time = vr41xx_rtc_set_time, 278 .set_time = vr41xx_rtc_set_time,
309 .read_alarm = vr41xx_rtc_read_alarm, 279 .read_alarm = vr41xx_rtc_read_alarm,
310 .set_alarm = vr41xx_rtc_set_alarm, 280 .set_alarm = vr41xx_rtc_set_alarm,
311 .irq_set_freq = vr41xx_rtc_irq_set_freq,
312 .irq_set_state = vr41xx_rtc_irq_set_state,
313}; 281};
314 282
315static int __devinit rtc_probe(struct platform_device *pdev) 283static int __devinit rtc_probe(struct platform_device *pdev)
diff --git a/drivers/rtc/rtc-wm831x.c b/drivers/rtc/rtc-wm831x.c
index 82931dc65c0b..bdc909bd56da 100644
--- a/drivers/rtc/rtc-wm831x.c
+++ b/drivers/rtc/rtc-wm831x.c
@@ -315,21 +315,6 @@ static int wm831x_rtc_alarm_irq_enable(struct device *dev,
315 return wm831x_rtc_stop_alarm(wm831x_rtc); 315 return wm831x_rtc_stop_alarm(wm831x_rtc);
316} 316}
317 317
318static int wm831x_rtc_update_irq_enable(struct device *dev,
319 unsigned int enabled)
320{
321 struct wm831x_rtc *wm831x_rtc = dev_get_drvdata(dev);
322 int val;
323
324 if (enabled)
325 val = 1 << WM831X_RTC_PINT_FREQ_SHIFT;
326 else
327 val = 0;
328
329 return wm831x_set_bits(wm831x_rtc->wm831x, WM831X_RTC_CONTROL,
330 WM831X_RTC_PINT_FREQ_MASK, val);
331}
332
333static irqreturn_t wm831x_alm_irq(int irq, void *data) 318static irqreturn_t wm831x_alm_irq(int irq, void *data)
334{ 319{
335 struct wm831x_rtc *wm831x_rtc = data; 320 struct wm831x_rtc *wm831x_rtc = data;
@@ -354,7 +339,6 @@ static const struct rtc_class_ops wm831x_rtc_ops = {
354 .read_alarm = wm831x_rtc_readalarm, 339 .read_alarm = wm831x_rtc_readalarm,
355 .set_alarm = wm831x_rtc_setalarm, 340 .set_alarm = wm831x_rtc_setalarm,
356 .alarm_irq_enable = wm831x_rtc_alarm_irq_enable, 341 .alarm_irq_enable = wm831x_rtc_alarm_irq_enable,
357 .update_irq_enable = wm831x_rtc_update_irq_enable,
358}; 342};
359 343
360#ifdef CONFIG_PM 344#ifdef CONFIG_PM
diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c
index 3d0dc76b38af..66421426e404 100644
--- a/drivers/rtc/rtc-wm8350.c
+++ b/drivers/rtc/rtc-wm8350.c
@@ -302,26 +302,6 @@ static int wm8350_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
302 return ret; 302 return ret;
303} 303}
304 304
305static int wm8350_rtc_update_irq_enable(struct device *dev,
306 unsigned int enabled)
307{
308 struct wm8350 *wm8350 = dev_get_drvdata(dev);
309
310 /* Suppress duplicate changes since genirq nests enable and
311 * disable calls. */
312 if (enabled == wm8350->rtc.update_enabled)
313 return 0;
314
315 if (enabled)
316 wm8350_unmask_irq(wm8350, WM8350_IRQ_RTC_SEC);
317 else
318 wm8350_mask_irq(wm8350, WM8350_IRQ_RTC_SEC);
319
320 wm8350->rtc.update_enabled = enabled;
321
322 return 0;
323}
324
325static irqreturn_t wm8350_rtc_alarm_handler(int irq, void *data) 305static irqreturn_t wm8350_rtc_alarm_handler(int irq, void *data)
326{ 306{
327 struct wm8350 *wm8350 = data; 307 struct wm8350 *wm8350 = data;
@@ -357,7 +337,6 @@ static const struct rtc_class_ops wm8350_rtc_ops = {
357 .read_alarm = wm8350_rtc_readalarm, 337 .read_alarm = wm8350_rtc_readalarm,
358 .set_alarm = wm8350_rtc_setalarm, 338 .set_alarm = wm8350_rtc_setalarm,
359 .alarm_irq_enable = wm8350_rtc_alarm_irq_enable, 339 .alarm_irq_enable = wm8350_rtc_alarm_irq_enable,
360 .update_irq_enable = wm8350_rtc_update_irq_enable,
361}; 340};
362 341
363#ifdef CONFIG_PM 342#ifdef CONFIG_PM
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index c881a14fa5dd..1f6a4d894e73 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -62,8 +62,8 @@ static int xpram_devs;
62/* 62/*
63 * Parameter parsing functions. 63 * Parameter parsing functions.
64 */ 64 */
65static int __initdata devs = XPRAM_DEVS; 65static int devs = XPRAM_DEVS;
66static char __initdata *sizes[XPRAM_MAX_DEVS]; 66static char *sizes[XPRAM_MAX_DEVS];
67 67
68module_param(devs, int, 0); 68module_param(devs, int, 0);
69module_param_array(sizes, charp, NULL, 0); 69module_param_array(sizes, charp, NULL, 0);
diff --git a/drivers/s390/char/keyboard.c b/drivers/s390/char/keyboard.c
index 8cd58e412b5e..5ad44daef73b 100644
--- a/drivers/s390/char/keyboard.c
+++ b/drivers/s390/char/keyboard.c
@@ -460,7 +460,8 @@ kbd_ioctl(struct kbd_data *kbd, struct file *file,
460 unsigned int cmd, unsigned long arg) 460 unsigned int cmd, unsigned long arg)
461{ 461{
462 void __user *argp; 462 void __user *argp;
463 int ct, perm; 463 unsigned int ct;
464 int perm;
464 465
465 argp = (void __user *)arg; 466 argp = (void __user *)arg;
466 467
diff --git a/drivers/s390/char/tape.h b/drivers/s390/char/tape.h
index 7a242f073632..267b54e8ff5a 100644
--- a/drivers/s390/char/tape.h
+++ b/drivers/s390/char/tape.h
@@ -280,6 +280,14 @@ tape_do_io_free(struct tape_device *device, struct tape_request *request)
280 return rc; 280 return rc;
281} 281}
282 282
283static inline void
284tape_do_io_async_free(struct tape_device *device, struct tape_request *request)
285{
286 request->callback = (void *) tape_free_request;
287 request->callback_data = NULL;
288 tape_do_io_async(device, request);
289}
290
283extern int tape_oper_handler(int irq, int status); 291extern int tape_oper_handler(int irq, int status);
284extern void tape_noper_handler(int irq, int status); 292extern void tape_noper_handler(int irq, int status);
285extern int tape_open(struct tape_device *); 293extern int tape_open(struct tape_device *);
diff --git a/drivers/s390/char/tape_34xx.c b/drivers/s390/char/tape_34xx.c
index c17f35b6136a..c26511171ffe 100644
--- a/drivers/s390/char/tape_34xx.c
+++ b/drivers/s390/char/tape_34xx.c
@@ -53,23 +53,11 @@ static void tape_34xx_delete_sbid_from(struct tape_device *, int);
53 * Medium sense for 34xx tapes. There is no 'real' medium sense call. 53 * Medium sense for 34xx tapes. There is no 'real' medium sense call.
54 * So we just do a normal sense. 54 * So we just do a normal sense.
55 */ 55 */
56static int 56static void __tape_34xx_medium_sense(struct tape_request *request)
57tape_34xx_medium_sense(struct tape_device *device)
58{ 57{
59 struct tape_request *request; 58 struct tape_device *device = request->device;
60 unsigned char *sense; 59 unsigned char *sense;
61 int rc;
62
63 request = tape_alloc_request(1, 32);
64 if (IS_ERR(request)) {
65 DBF_EXCEPTION(6, "MSEN fail\n");
66 return PTR_ERR(request);
67 }
68
69 request->op = TO_MSEN;
70 tape_ccw_end(request->cpaddr, SENSE, 32, request->cpdata);
71 60
72 rc = tape_do_io_interruptible(device, request);
73 if (request->rc == 0) { 61 if (request->rc == 0) {
74 sense = request->cpdata; 62 sense = request->cpdata;
75 63
@@ -88,15 +76,47 @@ tape_34xx_medium_sense(struct tape_device *device)
88 device->tape_generic_status |= GMT_WR_PROT(~0); 76 device->tape_generic_status |= GMT_WR_PROT(~0);
89 else 77 else
90 device->tape_generic_status &= ~GMT_WR_PROT(~0); 78 device->tape_generic_status &= ~GMT_WR_PROT(~0);
91 } else { 79 } else
92 DBF_EVENT(4, "tape_34xx: medium sense failed with rc=%d\n", 80 DBF_EVENT(4, "tape_34xx: medium sense failed with rc=%d\n",
93 request->rc); 81 request->rc);
94 }
95 tape_free_request(request); 82 tape_free_request(request);
83}
84
85static int tape_34xx_medium_sense(struct tape_device *device)
86{
87 struct tape_request *request;
88 int rc;
89
90 request = tape_alloc_request(1, 32);
91 if (IS_ERR(request)) {
92 DBF_EXCEPTION(6, "MSEN fail\n");
93 return PTR_ERR(request);
94 }
96 95
96 request->op = TO_MSEN;
97 tape_ccw_end(request->cpaddr, SENSE, 32, request->cpdata);
98 rc = tape_do_io_interruptible(device, request);
99 __tape_34xx_medium_sense(request);
97 return rc; 100 return rc;
98} 101}
99 102
103static void tape_34xx_medium_sense_async(struct tape_device *device)
104{
105 struct tape_request *request;
106
107 request = tape_alloc_request(1, 32);
108 if (IS_ERR(request)) {
109 DBF_EXCEPTION(6, "MSEN fail\n");
110 return;
111 }
112
113 request->op = TO_MSEN;
114 tape_ccw_end(request->cpaddr, SENSE, 32, request->cpdata);
115 request->callback = (void *) __tape_34xx_medium_sense;
116 request->callback_data = NULL;
117 tape_do_io_async(device, request);
118}
119
100struct tape_34xx_work { 120struct tape_34xx_work {
101 struct tape_device *device; 121 struct tape_device *device;
102 enum tape_op op; 122 enum tape_op op;
@@ -109,6 +129,9 @@ struct tape_34xx_work {
109 * is inserted but cannot call tape_do_io* from an interrupt context. 129 * is inserted but cannot call tape_do_io* from an interrupt context.
110 * Maybe that's useful for other actions we want to start from the 130 * Maybe that's useful for other actions we want to start from the
111 * interrupt handler. 131 * interrupt handler.
132 * Note: the work handler is called by the system work queue. The tape
133 * commands started by the handler need to be asynchrounous, otherwise
134 * a deadlock can occur e.g. in case of a deferred cc=1 (see __tape_do_irq).
112 */ 135 */
113static void 136static void
114tape_34xx_work_handler(struct work_struct *work) 137tape_34xx_work_handler(struct work_struct *work)
@@ -119,7 +142,7 @@ tape_34xx_work_handler(struct work_struct *work)
119 142
120 switch(p->op) { 143 switch(p->op) {
121 case TO_MSEN: 144 case TO_MSEN:
122 tape_34xx_medium_sense(device); 145 tape_34xx_medium_sense_async(device);
123 break; 146 break;
124 default: 147 default:
125 DBF_EVENT(3, "T34XX: internal error: unknown work\n"); 148 DBF_EVENT(3, "T34XX: internal error: unknown work\n");
diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c
index fbe361fcd2c0..de2e99e0a71b 100644
--- a/drivers/s390/char/tape_3590.c
+++ b/drivers/s390/char/tape_3590.c
@@ -329,17 +329,17 @@ out:
329/* 329/*
330 * Enable encryption 330 * Enable encryption
331 */ 331 */
332static int tape_3592_enable_crypt(struct tape_device *device) 332static struct tape_request *__tape_3592_enable_crypt(struct tape_device *device)
333{ 333{
334 struct tape_request *request; 334 struct tape_request *request;
335 char *data; 335 char *data;
336 336
337 DBF_EVENT(6, "tape_3592_enable_crypt\n"); 337 DBF_EVENT(6, "tape_3592_enable_crypt\n");
338 if (!crypt_supported(device)) 338 if (!crypt_supported(device))
339 return -ENOSYS; 339 return ERR_PTR(-ENOSYS);
340 request = tape_alloc_request(2, 72); 340 request = tape_alloc_request(2, 72);
341 if (IS_ERR(request)) 341 if (IS_ERR(request))
342 return PTR_ERR(request); 342 return request;
343 data = request->cpdata; 343 data = request->cpdata;
344 memset(data,0,72); 344 memset(data,0,72);
345 345
@@ -354,23 +354,42 @@ static int tape_3592_enable_crypt(struct tape_device *device)
354 request->op = TO_CRYPT_ON; 354 request->op = TO_CRYPT_ON;
355 tape_ccw_cc(request->cpaddr, MODE_SET_CB, 36, data); 355 tape_ccw_cc(request->cpaddr, MODE_SET_CB, 36, data);
356 tape_ccw_end(request->cpaddr + 1, MODE_SET_CB, 36, data + 36); 356 tape_ccw_end(request->cpaddr + 1, MODE_SET_CB, 36, data + 36);
357 return request;
358}
359
360static int tape_3592_enable_crypt(struct tape_device *device)
361{
362 struct tape_request *request;
363
364 request = __tape_3592_enable_crypt(device);
365 if (IS_ERR(request))
366 return PTR_ERR(request);
357 return tape_do_io_free(device, request); 367 return tape_do_io_free(device, request);
358} 368}
359 369
370static void tape_3592_enable_crypt_async(struct tape_device *device)
371{
372 struct tape_request *request;
373
374 request = __tape_3592_enable_crypt(device);
375 if (!IS_ERR(request))
376 tape_do_io_async_free(device, request);
377}
378
360/* 379/*
361 * Disable encryption 380 * Disable encryption
362 */ 381 */
363static int tape_3592_disable_crypt(struct tape_device *device) 382static struct tape_request *__tape_3592_disable_crypt(struct tape_device *device)
364{ 383{
365 struct tape_request *request; 384 struct tape_request *request;
366 char *data; 385 char *data;
367 386
368 DBF_EVENT(6, "tape_3592_disable_crypt\n"); 387 DBF_EVENT(6, "tape_3592_disable_crypt\n");
369 if (!crypt_supported(device)) 388 if (!crypt_supported(device))
370 return -ENOSYS; 389 return ERR_PTR(-ENOSYS);
371 request = tape_alloc_request(2, 72); 390 request = tape_alloc_request(2, 72);
372 if (IS_ERR(request)) 391 if (IS_ERR(request))
373 return PTR_ERR(request); 392 return request;
374 data = request->cpdata; 393 data = request->cpdata;
375 memset(data,0,72); 394 memset(data,0,72);
376 395
@@ -383,9 +402,28 @@ static int tape_3592_disable_crypt(struct tape_device *device)
383 tape_ccw_cc(request->cpaddr, MODE_SET_CB, 36, data); 402 tape_ccw_cc(request->cpaddr, MODE_SET_CB, 36, data);
384 tape_ccw_end(request->cpaddr + 1, MODE_SET_CB, 36, data + 36); 403 tape_ccw_end(request->cpaddr + 1, MODE_SET_CB, 36, data + 36);
385 404
405 return request;
406}
407
408static int tape_3592_disable_crypt(struct tape_device *device)
409{
410 struct tape_request *request;
411
412 request = __tape_3592_disable_crypt(device);
413 if (IS_ERR(request))
414 return PTR_ERR(request);
386 return tape_do_io_free(device, request); 415 return tape_do_io_free(device, request);
387} 416}
388 417
418static void tape_3592_disable_crypt_async(struct tape_device *device)
419{
420 struct tape_request *request;
421
422 request = __tape_3592_disable_crypt(device);
423 if (!IS_ERR(request))
424 tape_do_io_async_free(device, request);
425}
426
389/* 427/*
390 * IOCTL: Set encryption status 428 * IOCTL: Set encryption status
391 */ 429 */
@@ -457,8 +495,7 @@ tape_3590_ioctl(struct tape_device *device, unsigned int cmd, unsigned long arg)
457/* 495/*
458 * SENSE Medium: Get Sense data about medium state 496 * SENSE Medium: Get Sense data about medium state
459 */ 497 */
460static int 498static int tape_3590_sense_medium(struct tape_device *device)
461tape_3590_sense_medium(struct tape_device *device)
462{ 499{
463 struct tape_request *request; 500 struct tape_request *request;
464 501
@@ -470,6 +507,18 @@ tape_3590_sense_medium(struct tape_device *device)
470 return tape_do_io_free(device, request); 507 return tape_do_io_free(device, request);
471} 508}
472 509
510static void tape_3590_sense_medium_async(struct tape_device *device)
511{
512 struct tape_request *request;
513
514 request = tape_alloc_request(1, 128);
515 if (IS_ERR(request))
516 return;
517 request->op = TO_MSEN;
518 tape_ccw_end(request->cpaddr, MEDIUM_SENSE, 128, request->cpdata);
519 tape_do_io_async_free(device, request);
520}
521
473/* 522/*
474 * MTTELL: Tell block. Return the number of block relative to current file. 523 * MTTELL: Tell block. Return the number of block relative to current file.
475 */ 524 */
@@ -546,15 +595,14 @@ tape_3590_read_opposite(struct tape_device *device,
546 * 2. The attention msg is written to the "read subsystem data" buffer. 595 * 2. The attention msg is written to the "read subsystem data" buffer.
547 * In this case we probably should print it to the console. 596 * In this case we probably should print it to the console.
548 */ 597 */
549static int 598static void tape_3590_read_attmsg_async(struct tape_device *device)
550tape_3590_read_attmsg(struct tape_device *device)
551{ 599{
552 struct tape_request *request; 600 struct tape_request *request;
553 char *buf; 601 char *buf;
554 602
555 request = tape_alloc_request(3, 4096); 603 request = tape_alloc_request(3, 4096);
556 if (IS_ERR(request)) 604 if (IS_ERR(request))
557 return PTR_ERR(request); 605 return;
558 request->op = TO_READ_ATTMSG; 606 request->op = TO_READ_ATTMSG;
559 buf = request->cpdata; 607 buf = request->cpdata;
560 buf[0] = PREP_RD_SS_DATA; 608 buf[0] = PREP_RD_SS_DATA;
@@ -562,12 +610,15 @@ tape_3590_read_attmsg(struct tape_device *device)
562 tape_ccw_cc(request->cpaddr, PERFORM_SS_FUNC, 12, buf); 610 tape_ccw_cc(request->cpaddr, PERFORM_SS_FUNC, 12, buf);
563 tape_ccw_cc(request->cpaddr + 1, READ_SS_DATA, 4096 - 12, buf + 12); 611 tape_ccw_cc(request->cpaddr + 1, READ_SS_DATA, 4096 - 12, buf + 12);
564 tape_ccw_end(request->cpaddr + 2, NOP, 0, NULL); 612 tape_ccw_end(request->cpaddr + 2, NOP, 0, NULL);
565 return tape_do_io_free(device, request); 613 tape_do_io_async_free(device, request);
566} 614}
567 615
568/* 616/*
569 * These functions are used to schedule follow-up actions from within an 617 * These functions are used to schedule follow-up actions from within an
570 * interrupt context (like unsolicited interrupts). 618 * interrupt context (like unsolicited interrupts).
619 * Note: the work handler is called by the system work queue. The tape
620 * commands started by the handler need to be asynchrounous, otherwise
621 * a deadlock can occur e.g. in case of a deferred cc=1 (see __tape_do_irq).
571 */ 622 */
572struct work_handler_data { 623struct work_handler_data {
573 struct tape_device *device; 624 struct tape_device *device;
@@ -583,16 +634,16 @@ tape_3590_work_handler(struct work_struct *work)
583 634
584 switch (p->op) { 635 switch (p->op) {
585 case TO_MSEN: 636 case TO_MSEN:
586 tape_3590_sense_medium(p->device); 637 tape_3590_sense_medium_async(p->device);
587 break; 638 break;
588 case TO_READ_ATTMSG: 639 case TO_READ_ATTMSG:
589 tape_3590_read_attmsg(p->device); 640 tape_3590_read_attmsg_async(p->device);
590 break; 641 break;
591 case TO_CRYPT_ON: 642 case TO_CRYPT_ON:
592 tape_3592_enable_crypt(p->device); 643 tape_3592_enable_crypt_async(p->device);
593 break; 644 break;
594 case TO_CRYPT_OFF: 645 case TO_CRYPT_OFF:
595 tape_3592_disable_crypt(p->device); 646 tape_3592_disable_crypt_async(p->device);
596 break; 647 break;
597 default: 648 default:
598 DBF_EVENT(3, "T3590: work handler undefined for " 649 DBF_EVENT(3, "T3590: work handler undefined for "
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 9045c52abd25..fb2bb35c62cb 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -443,7 +443,7 @@ static void scsi_run_queue(struct request_queue *q)
443 &sdev->request_queue->queue_flags); 443 &sdev->request_queue->queue_flags);
444 if (flagset) 444 if (flagset)
445 queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue); 445 queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue);
446 __blk_run_queue(sdev->request_queue); 446 __blk_run_queue(sdev->request_queue, false);
447 if (flagset) 447 if (flagset)
448 queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue); 448 queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue);
449 spin_unlock(sdev->request_queue->queue_lock); 449 spin_unlock(sdev->request_queue->queue_lock);
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 998c01be3234..5c3ccfc6b622 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -3829,7 +3829,7 @@ fc_bsg_goose_queue(struct fc_rport *rport)
3829 !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags); 3829 !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags);
3830 if (flagset) 3830 if (flagset)
3831 queue_flag_set(QUEUE_FLAG_REENTER, rport->rqst_q); 3831 queue_flag_set(QUEUE_FLAG_REENTER, rport->rqst_q);
3832 __blk_run_queue(rport->rqst_q); 3832 __blk_run_queue(rport->rqst_q, false);
3833 if (flagset) 3833 if (flagset)
3834 queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q); 3834 queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q);
3835 spin_unlock_irqrestore(rport->rqst_q->queue_lock, flags); 3835 spin_unlock_irqrestore(rport->rqst_q->queue_lock, flags);
diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index 158cecbec718..4a109835e420 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -282,6 +282,9 @@ int core_tmr_lun_reset(
282 282
283 atomic_set(&task->task_active, 0); 283 atomic_set(&task->task_active, 0);
284 atomic_set(&task->task_stop, 0); 284 atomic_set(&task->task_stop, 0);
285 } else {
286 if (atomic_read(&task->task_execute_queue) != 0)
287 transport_remove_task_from_execute_queue(task, dev);
285 } 288 }
286 __transport_stop_task_timer(task, &flags); 289 __transport_stop_task_timer(task, &flags);
287 290
@@ -301,6 +304,7 @@ int core_tmr_lun_reset(
301 DEBUG_LR("LUN_RESET: got t_transport_active = 1 for" 304 DEBUG_LR("LUN_RESET: got t_transport_active = 1 for"
302 " task: %p, t_fe_count: %d dev: %p\n", task, 305 " task: %p, t_fe_count: %d dev: %p\n", task,
303 fe_count, dev); 306 fe_count, dev);
307 atomic_set(&T_TASK(cmd)->t_transport_aborted, 1);
304 spin_unlock_irqrestore(&T_TASK(cmd)->t_state_lock, 308 spin_unlock_irqrestore(&T_TASK(cmd)->t_state_lock,
305 flags); 309 flags);
306 core_tmr_handle_tas_abort(tmr_nacl, cmd, tas, fe_count); 310 core_tmr_handle_tas_abort(tmr_nacl, cmd, tas, fe_count);
@@ -310,6 +314,7 @@ int core_tmr_lun_reset(
310 } 314 }
311 DEBUG_LR("LUN_RESET: Got t_transport_active = 0 for task: %p," 315 DEBUG_LR("LUN_RESET: Got t_transport_active = 0 for task: %p,"
312 " t_fe_count: %d dev: %p\n", task, fe_count, dev); 316 " t_fe_count: %d dev: %p\n", task, fe_count, dev);
317 atomic_set(&T_TASK(cmd)->t_transport_aborted, 1);
313 spin_unlock_irqrestore(&T_TASK(cmd)->t_state_lock, flags); 318 spin_unlock_irqrestore(&T_TASK(cmd)->t_state_lock, flags);
314 core_tmr_handle_tas_abort(tmr_nacl, cmd, tas, fe_count); 319 core_tmr_handle_tas_abort(tmr_nacl, cmd, tas, fe_count);
315 320
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 236e22d8cfae..4bbf6c147f89 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1207,7 +1207,7 @@ transport_get_task_from_execute_queue(struct se_device *dev)
1207 * 1207 *
1208 * 1208 *
1209 */ 1209 */
1210static void transport_remove_task_from_execute_queue( 1210void transport_remove_task_from_execute_queue(
1211 struct se_task *task, 1211 struct se_task *task,
1212 struct se_device *dev) 1212 struct se_device *dev)
1213{ 1213{
@@ -5549,7 +5549,8 @@ static void transport_generic_wait_for_tasks(
5549 5549
5550 atomic_set(&T_TASK(cmd)->transport_lun_stop, 0); 5550 atomic_set(&T_TASK(cmd)->transport_lun_stop, 0);
5551 } 5551 }
5552 if (!atomic_read(&T_TASK(cmd)->t_transport_active)) 5552 if (!atomic_read(&T_TASK(cmd)->t_transport_active) ||
5553 atomic_read(&T_TASK(cmd)->t_transport_aborted))
5553 goto remove; 5554 goto remove;
5554 5555
5555 atomic_set(&T_TASK(cmd)->t_transport_stop, 1); 5556 atomic_set(&T_TASK(cmd)->t_transport_stop, 1);
@@ -5956,6 +5957,9 @@ static void transport_processing_shutdown(struct se_device *dev)
5956 5957
5957 atomic_set(&task->task_active, 0); 5958 atomic_set(&task->task_active, 0);
5958 atomic_set(&task->task_stop, 0); 5959 atomic_set(&task->task_stop, 0);
5960 } else {
5961 if (atomic_read(&task->task_execute_queue) != 0)
5962 transport_remove_task_from_execute_queue(task, dev);
5959 } 5963 }
5960 __transport_stop_task_timer(task, &flags); 5964 __transport_stop_task_timer(task, &flags);
5961 5965
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index f7a5dba3ca23..bf7c687519ef 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -4,7 +4,6 @@
4 4
5menuconfig THERMAL 5menuconfig THERMAL
6 tristate "Generic Thermal sysfs driver" 6 tristate "Generic Thermal sysfs driver"
7 depends on NET
8 help 7 help
9 Generic Thermal Sysfs driver offers a generic mechanism for 8 Generic Thermal Sysfs driver offers a generic mechanism for
10 thermal management. Usually it's made up of one or more thermal 9 thermal management. Usually it's made up of one or more thermal
diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index 7d0e63c79280..713b7ea4a607 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c
@@ -62,20 +62,6 @@ static DEFINE_MUTEX(thermal_list_lock);
62 62
63static unsigned int thermal_event_seqnum; 63static unsigned int thermal_event_seqnum;
64 64
65static struct genl_family thermal_event_genl_family = {
66 .id = GENL_ID_GENERATE,
67 .name = THERMAL_GENL_FAMILY_NAME,
68 .version = THERMAL_GENL_VERSION,
69 .maxattr = THERMAL_GENL_ATTR_MAX,
70};
71
72static struct genl_multicast_group thermal_event_mcgrp = {
73 .name = THERMAL_GENL_MCAST_GROUP_NAME,
74};
75
76static int genetlink_init(void);
77static void genetlink_exit(void);
78
79static int get_idr(struct idr *idr, struct mutex *lock, int *id) 65static int get_idr(struct idr *idr, struct mutex *lock, int *id)
80{ 66{
81 int err; 67 int err;
@@ -1225,6 +1211,18 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz)
1225 1211
1226EXPORT_SYMBOL(thermal_zone_device_unregister); 1212EXPORT_SYMBOL(thermal_zone_device_unregister);
1227 1213
1214#ifdef CONFIG_NET
1215static struct genl_family thermal_event_genl_family = {
1216 .id = GENL_ID_GENERATE,
1217 .name = THERMAL_GENL_FAMILY_NAME,
1218 .version = THERMAL_GENL_VERSION,
1219 .maxattr = THERMAL_GENL_ATTR_MAX,
1220};
1221
1222static struct genl_multicast_group thermal_event_mcgrp = {
1223 .name = THERMAL_GENL_MCAST_GROUP_NAME,
1224};
1225
1228int generate_netlink_event(u32 orig, enum events event) 1226int generate_netlink_event(u32 orig, enum events event)
1229{ 1227{
1230 struct sk_buff *skb; 1228 struct sk_buff *skb;
@@ -1301,6 +1299,15 @@ static int genetlink_init(void)
1301 return result; 1299 return result;
1302} 1300}
1303 1301
1302static void genetlink_exit(void)
1303{
1304 genl_unregister_family(&thermal_event_genl_family);
1305}
1306#else /* !CONFIG_NET */
1307static inline int genetlink_init(void) { return 0; }
1308static inline void genetlink_exit(void) {}
1309#endif /* !CONFIG_NET */
1310
1304static int __init thermal_init(void) 1311static int __init thermal_init(void)
1305{ 1312{
1306 int result = 0; 1313 int result = 0;
@@ -1316,11 +1323,6 @@ static int __init thermal_init(void)
1316 return result; 1323 return result;
1317} 1324}
1318 1325
1319static void genetlink_exit(void)
1320{
1321 genl_unregister_family(&thermal_event_genl_family);
1322}
1323
1324static void __exit thermal_exit(void) 1326static void __exit thermal_exit(void)
1325{ 1327{
1326 class_unregister(&thermal_class); 1328 class_unregister(&thermal_class);
diff --git a/drivers/tty/serial/serial_cs.c b/drivers/tty/serial/serial_cs.c
index 93760b2ea172..1ef4df9bf7e4 100644
--- a/drivers/tty/serial/serial_cs.c
+++ b/drivers/tty/serial/serial_cs.c
@@ -712,6 +712,7 @@ static struct pcmcia_device_id serial_ids[] = {
712 PCMCIA_PFC_DEVICE_PROD_ID12(1, "Xircom", "CreditCard Ethernet+Modem II", 0x2e3ee845, 0xeca401bf), 712 PCMCIA_PFC_DEVICE_PROD_ID12(1, "Xircom", "CreditCard Ethernet+Modem II", 0x2e3ee845, 0xeca401bf),
713 PCMCIA_PFC_DEVICE_MANF_CARD(1, 0x0032, 0x0e01), 713 PCMCIA_PFC_DEVICE_MANF_CARD(1, 0x0032, 0x0e01),
714 PCMCIA_PFC_DEVICE_MANF_CARD(1, 0x0032, 0x0a05), 714 PCMCIA_PFC_DEVICE_MANF_CARD(1, 0x0032, 0x0a05),
715 PCMCIA_PFC_DEVICE_MANF_CARD(1, 0x0032, 0x0b05),
715 PCMCIA_PFC_DEVICE_MANF_CARD(1, 0x0032, 0x1101), 716 PCMCIA_PFC_DEVICE_MANF_CARD(1, 0x0032, 0x1101),
716 PCMCIA_MFC_DEVICE_MANF_CARD(0, 0x0104, 0x0070), 717 PCMCIA_MFC_DEVICE_MANF_CARD(0, 0x0104, 0x0070),
717 PCMCIA_MFC_DEVICE_MANF_CARD(1, 0x0101, 0x0562), 718 PCMCIA_MFC_DEVICE_MANF_CARD(1, 0x0101, 0x0562),
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index d041c6826e43..0f299b7aad60 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -2681,17 +2681,13 @@ hub_port_init (struct usb_hub *hub, struct usb_device *udev, int port1,
2681 2681
2682 mutex_lock(&usb_address0_mutex); 2682 mutex_lock(&usb_address0_mutex);
2683 2683
2684 if (!udev->config && oldspeed == USB_SPEED_SUPER) { 2684 /* Reset the device; full speed may morph to high speed */
2685 /* Don't reset USB 3.0 devices during an initial setup */ 2685 /* FIXME a USB 2.0 device may morph into SuperSpeed on reset. */
2686 usb_set_device_state(udev, USB_STATE_DEFAULT); 2686 retval = hub_port_reset(hub, port1, udev, delay);
2687 } else { 2687 if (retval < 0) /* error or disconnect */
2688 /* Reset the device; full speed may morph to high speed */ 2688 goto fail;
2689 /* FIXME a USB 2.0 device may morph into SuperSpeed on reset. */ 2689 /* success, speed is known */
2690 retval = hub_port_reset(hub, port1, udev, delay); 2690
2691 if (retval < 0) /* error or disconnect */
2692 goto fail;
2693 /* success, speed is known */
2694 }
2695 retval = -ENODEV; 2691 retval = -ENODEV;
2696 2692
2697 if (oldspeed != USB_SPEED_UNKNOWN && oldspeed != udev->speed) { 2693 if (oldspeed != USB_SPEED_UNKNOWN && oldspeed != udev->speed) {
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 44c595432d6f..81ce6a8e1d94 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -48,6 +48,10 @@ static const struct usb_device_id usb_quirk_list[] = {
48 { USB_DEVICE(0x04b4, 0x0526), .driver_info = 48 { USB_DEVICE(0x04b4, 0x0526), .driver_info =
49 USB_QUIRK_CONFIG_INTF_STRINGS }, 49 USB_QUIRK_CONFIG_INTF_STRINGS },
50 50
51 /* Samsung Android phone modem - ID conflict with SPH-I500 */
52 { USB_DEVICE(0x04e8, 0x6601), .driver_info =
53 USB_QUIRK_CONFIG_INTF_STRINGS },
54
51 /* Roland SC-8820 */ 55 /* Roland SC-8820 */
52 { USB_DEVICE(0x0582, 0x0007), .driver_info = USB_QUIRK_RESET_RESUME }, 56 { USB_DEVICE(0x0582, 0x0007), .driver_info = USB_QUIRK_RESET_RESUME },
53 57
@@ -68,6 +72,10 @@ static const struct usb_device_id usb_quirk_list[] = {
68 /* M-Systems Flash Disk Pioneers */ 72 /* M-Systems Flash Disk Pioneers */
69 { USB_DEVICE(0x08ec, 0x1000), .driver_info = USB_QUIRK_RESET_RESUME }, 73 { USB_DEVICE(0x08ec, 0x1000), .driver_info = USB_QUIRK_RESET_RESUME },
70 74
75 /* Keytouch QWERTY Panel keyboard */
76 { USB_DEVICE(0x0926, 0x3333), .driver_info =
77 USB_QUIRK_CONFIG_INTF_STRINGS },
78
71 /* X-Rite/Gretag-Macbeth Eye-One Pro display colorimeter */ 79 /* X-Rite/Gretag-Macbeth Eye-One Pro display colorimeter */
72 { USB_DEVICE(0x0971, 0x2000), .driver_info = USB_QUIRK_NO_SET_INTF }, 80 { USB_DEVICE(0x0971, 0x2000), .driver_info = USB_QUIRK_NO_SET_INTF },
73 81
diff --git a/drivers/usb/gadget/f_phonet.c b/drivers/usb/gadget/f_phonet.c
index 3c6e1a058745..5e1495097ec3 100644
--- a/drivers/usb/gadget/f_phonet.c
+++ b/drivers/usb/gadget/f_phonet.c
@@ -346,14 +346,19 @@ static void pn_rx_complete(struct usb_ep *ep, struct usb_request *req)
346 346
347 if (unlikely(!skb)) 347 if (unlikely(!skb))
348 break; 348 break;
349 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, 0,
350 req->actual);
351 page = NULL;
352 349
353 if (req->actual < req->length) { /* Last fragment */ 350 if (skb->len == 0) { /* First fragment */
354 skb->protocol = htons(ETH_P_PHONET); 351 skb->protocol = htons(ETH_P_PHONET);
355 skb_reset_mac_header(skb); 352 skb_reset_mac_header(skb);
356 pskb_pull(skb, 1); 353 /* Can't use pskb_pull() on page in IRQ */
354 memcpy(skb_put(skb, 1), page_address(page), 1);
355 }
356
357 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
358 skb->len == 0, req->actual);
359 page = NULL;
360
361 if (req->actual < req->length) { /* Last fragment */
357 skb->dev = dev; 362 skb->dev = dev;
358 dev->stats.rx_packets++; 363 dev->stats.rx_packets++;
359 dev->stats.rx_bytes += skb->len; 364 dev->stats.rx_bytes += skb->len;
diff --git a/drivers/usb/host/ehci-xilinx-of.c b/drivers/usb/host/ehci-xilinx-of.c
index e8f4f36fdf0b..a6f21b891f68 100644
--- a/drivers/usb/host/ehci-xilinx-of.c
+++ b/drivers/usb/host/ehci-xilinx-of.c
@@ -29,6 +29,7 @@
29 29
30#include <linux/of.h> 30#include <linux/of.h>
31#include <linux/of_platform.h> 31#include <linux/of_platform.h>
32#include <linux/of_address.h>
32 33
33/** 34/**
34 * ehci_xilinx_of_setup - Initialize the device for ehci_reset() 35 * ehci_xilinx_of_setup - Initialize the device for ehci_reset()
diff --git a/drivers/usb/host/xhci-dbg.c b/drivers/usb/host/xhci-dbg.c
index fcbf4abbf381..0231814a97a5 100644
--- a/drivers/usb/host/xhci-dbg.c
+++ b/drivers/usb/host/xhci-dbg.c
@@ -169,9 +169,10 @@ static void xhci_print_ports(struct xhci_hcd *xhci)
169 } 169 }
170} 170}
171 171
172void xhci_print_ir_set(struct xhci_hcd *xhci, struct xhci_intr_reg *ir_set, int set_num) 172void xhci_print_ir_set(struct xhci_hcd *xhci, int set_num)
173{ 173{
174 void *addr; 174 struct xhci_intr_reg __iomem *ir_set = &xhci->run_regs->ir_set[set_num];
175 void __iomem *addr;
175 u32 temp; 176 u32 temp;
176 u64 temp_64; 177 u64 temp_64;
177 178
@@ -449,7 +450,7 @@ char *xhci_get_slot_state(struct xhci_hcd *xhci,
449 } 450 }
450} 451}
451 452
452void xhci_dbg_slot_ctx(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx) 453static void xhci_dbg_slot_ctx(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx)
453{ 454{
454 /* Fields are 32 bits wide, DMA addresses are in bytes */ 455 /* Fields are 32 bits wide, DMA addresses are in bytes */
455 int field_size = 32 / 8; 456 int field_size = 32 / 8;
@@ -488,7 +489,7 @@ void xhci_dbg_slot_ctx(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx)
488 dbg_rsvd64(xhci, (u64 *)slot_ctx, dma); 489 dbg_rsvd64(xhci, (u64 *)slot_ctx, dma);
489} 490}
490 491
491void xhci_dbg_ep_ctx(struct xhci_hcd *xhci, 492static void xhci_dbg_ep_ctx(struct xhci_hcd *xhci,
492 struct xhci_container_ctx *ctx, 493 struct xhci_container_ctx *ctx,
493 unsigned int last_ep) 494 unsigned int last_ep)
494{ 495{
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 1d0f45f0e7a6..a9534396e85b 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -307,7 +307,7 @@ struct xhci_ep_ctx *xhci_get_ep_ctx(struct xhci_hcd *xhci,
307 307
308/***************** Streams structures manipulation *************************/ 308/***************** Streams structures manipulation *************************/
309 309
310void xhci_free_stream_ctx(struct xhci_hcd *xhci, 310static void xhci_free_stream_ctx(struct xhci_hcd *xhci,
311 unsigned int num_stream_ctxs, 311 unsigned int num_stream_ctxs,
312 struct xhci_stream_ctx *stream_ctx, dma_addr_t dma) 312 struct xhci_stream_ctx *stream_ctx, dma_addr_t dma)
313{ 313{
@@ -335,7 +335,7 @@ void xhci_free_stream_ctx(struct xhci_hcd *xhci,
335 * The stream context array must be a power of 2, and can be as small as 335 * The stream context array must be a power of 2, and can be as small as
336 * 64 bytes or as large as 1MB. 336 * 64 bytes or as large as 1MB.
337 */ 337 */
338struct xhci_stream_ctx *xhci_alloc_stream_ctx(struct xhci_hcd *xhci, 338static struct xhci_stream_ctx *xhci_alloc_stream_ctx(struct xhci_hcd *xhci,
339 unsigned int num_stream_ctxs, dma_addr_t *dma, 339 unsigned int num_stream_ctxs, dma_addr_t *dma,
340 gfp_t mem_flags) 340 gfp_t mem_flags)
341{ 341{
@@ -1900,11 +1900,11 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
1900 val &= DBOFF_MASK; 1900 val &= DBOFF_MASK;
1901 xhci_dbg(xhci, "// Doorbell array is located at offset 0x%x" 1901 xhci_dbg(xhci, "// Doorbell array is located at offset 0x%x"
1902 " from cap regs base addr\n", val); 1902 " from cap regs base addr\n", val);
1903 xhci->dba = (void *) xhci->cap_regs + val; 1903 xhci->dba = (void __iomem *) xhci->cap_regs + val;
1904 xhci_dbg_regs(xhci); 1904 xhci_dbg_regs(xhci);
1905 xhci_print_run_regs(xhci); 1905 xhci_print_run_regs(xhci);
1906 /* Set ir_set to interrupt register set 0 */ 1906 /* Set ir_set to interrupt register set 0 */
1907 xhci->ir_set = (void *) xhci->run_regs->ir_set; 1907 xhci->ir_set = &xhci->run_regs->ir_set[0];
1908 1908
1909 /* 1909 /*
1910 * Event ring setup: Allocate a normal ring, but also setup 1910 * Event ring setup: Allocate a normal ring, but also setup
@@ -1961,7 +1961,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
1961 /* Set the event ring dequeue address */ 1961 /* Set the event ring dequeue address */
1962 xhci_set_hc_event_deq(xhci); 1962 xhci_set_hc_event_deq(xhci);
1963 xhci_dbg(xhci, "Wrote ERST address to ir_set 0.\n"); 1963 xhci_dbg(xhci, "Wrote ERST address to ir_set 0.\n");
1964 xhci_print_ir_set(xhci, xhci->ir_set, 0); 1964 xhci_print_ir_set(xhci, 0);
1965 1965
1966 /* 1966 /*
1967 * XXX: Might need to set the Interrupter Moderation Register to 1967 * XXX: Might need to set the Interrupter Moderation Register to
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 3e8211c1ce5a..3289bf4832c9 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -474,8 +474,11 @@ void xhci_find_new_dequeue_state(struct xhci_hcd *xhci,
474 state->new_deq_seg = find_trb_seg(cur_td->start_seg, 474 state->new_deq_seg = find_trb_seg(cur_td->start_seg,
475 dev->eps[ep_index].stopped_trb, 475 dev->eps[ep_index].stopped_trb,
476 &state->new_cycle_state); 476 &state->new_cycle_state);
477 if (!state->new_deq_seg) 477 if (!state->new_deq_seg) {
478 BUG(); 478 WARN_ON(1);
479 return;
480 }
481
479 /* Dig out the cycle state saved by the xHC during the stop ep cmd */ 482 /* Dig out the cycle state saved by the xHC during the stop ep cmd */
480 xhci_dbg(xhci, "Finding endpoint context\n"); 483 xhci_dbg(xhci, "Finding endpoint context\n");
481 ep_ctx = xhci_get_ep_ctx(xhci, dev->out_ctx, ep_index); 484 ep_ctx = xhci_get_ep_ctx(xhci, dev->out_ctx, ep_index);
@@ -486,8 +489,10 @@ void xhci_find_new_dequeue_state(struct xhci_hcd *xhci,
486 state->new_deq_seg = find_trb_seg(state->new_deq_seg, 489 state->new_deq_seg = find_trb_seg(state->new_deq_seg,
487 state->new_deq_ptr, 490 state->new_deq_ptr,
488 &state->new_cycle_state); 491 &state->new_cycle_state);
489 if (!state->new_deq_seg) 492 if (!state->new_deq_seg) {
490 BUG(); 493 WARN_ON(1);
494 return;
495 }
491 496
492 trb = &state->new_deq_ptr->generic; 497 trb = &state->new_deq_ptr->generic;
493 if ((trb->field[3] & TRB_TYPE_BITMASK) == TRB_TYPE(TRB_LINK) && 498 if ((trb->field[3] & TRB_TYPE_BITMASK) == TRB_TYPE(TRB_LINK) &&
@@ -2363,12 +2368,13 @@ static unsigned int count_sg_trbs_needed(struct xhci_hcd *xhci, struct urb *urb)
2363 2368
2364 /* Scatter gather list entries may cross 64KB boundaries */ 2369 /* Scatter gather list entries may cross 64KB boundaries */
2365 running_total = TRB_MAX_BUFF_SIZE - 2370 running_total = TRB_MAX_BUFF_SIZE -
2366 (sg_dma_address(sg) & ((1 << TRB_MAX_BUFF_SHIFT) - 1)); 2371 (sg_dma_address(sg) & (TRB_MAX_BUFF_SIZE - 1));
2372 running_total &= TRB_MAX_BUFF_SIZE - 1;
2367 if (running_total != 0) 2373 if (running_total != 0)
2368 num_trbs++; 2374 num_trbs++;
2369 2375
2370 /* How many more 64KB chunks to transfer, how many more TRBs? */ 2376 /* How many more 64KB chunks to transfer, how many more TRBs? */
2371 while (running_total < sg_dma_len(sg)) { 2377 while (running_total < sg_dma_len(sg) && running_total < temp) {
2372 num_trbs++; 2378 num_trbs++;
2373 running_total += TRB_MAX_BUFF_SIZE; 2379 running_total += TRB_MAX_BUFF_SIZE;
2374 } 2380 }
@@ -2394,11 +2400,11 @@ static unsigned int count_sg_trbs_needed(struct xhci_hcd *xhci, struct urb *urb)
2394static void check_trb_math(struct urb *urb, int num_trbs, int running_total) 2400static void check_trb_math(struct urb *urb, int num_trbs, int running_total)
2395{ 2401{
2396 if (num_trbs != 0) 2402 if (num_trbs != 0)
2397 dev_dbg(&urb->dev->dev, "%s - ep %#x - Miscalculated number of " 2403 dev_err(&urb->dev->dev, "%s - ep %#x - Miscalculated number of "
2398 "TRBs, %d left\n", __func__, 2404 "TRBs, %d left\n", __func__,
2399 urb->ep->desc.bEndpointAddress, num_trbs); 2405 urb->ep->desc.bEndpointAddress, num_trbs);
2400 if (running_total != urb->transfer_buffer_length) 2406 if (running_total != urb->transfer_buffer_length)
2401 dev_dbg(&urb->dev->dev, "%s - ep %#x - Miscalculated tx length, " 2407 dev_err(&urb->dev->dev, "%s - ep %#x - Miscalculated tx length, "
2402 "queued %#x (%d), asked for %#x (%d)\n", 2408 "queued %#x (%d), asked for %#x (%d)\n",
2403 __func__, 2409 __func__,
2404 urb->ep->desc.bEndpointAddress, 2410 urb->ep->desc.bEndpointAddress,
@@ -2533,8 +2539,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
2533 sg = urb->sg; 2539 sg = urb->sg;
2534 addr = (u64) sg_dma_address(sg); 2540 addr = (u64) sg_dma_address(sg);
2535 this_sg_len = sg_dma_len(sg); 2541 this_sg_len = sg_dma_len(sg);
2536 trb_buff_len = TRB_MAX_BUFF_SIZE - 2542 trb_buff_len = TRB_MAX_BUFF_SIZE - (addr & (TRB_MAX_BUFF_SIZE - 1));
2537 (addr & ((1 << TRB_MAX_BUFF_SHIFT) - 1));
2538 trb_buff_len = min_t(int, trb_buff_len, this_sg_len); 2543 trb_buff_len = min_t(int, trb_buff_len, this_sg_len);
2539 if (trb_buff_len > urb->transfer_buffer_length) 2544 if (trb_buff_len > urb->transfer_buffer_length)
2540 trb_buff_len = urb->transfer_buffer_length; 2545 trb_buff_len = urb->transfer_buffer_length;
@@ -2572,7 +2577,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
2572 (unsigned int) (addr + TRB_MAX_BUFF_SIZE) & ~(TRB_MAX_BUFF_SIZE - 1), 2577 (unsigned int) (addr + TRB_MAX_BUFF_SIZE) & ~(TRB_MAX_BUFF_SIZE - 1),
2573 (unsigned int) addr + trb_buff_len); 2578 (unsigned int) addr + trb_buff_len);
2574 if (TRB_MAX_BUFF_SIZE - 2579 if (TRB_MAX_BUFF_SIZE -
2575 (addr & ((1 << TRB_MAX_BUFF_SHIFT) - 1)) < trb_buff_len) { 2580 (addr & (TRB_MAX_BUFF_SIZE - 1)) < trb_buff_len) {
2576 xhci_warn(xhci, "WARN: sg dma xfer crosses 64KB boundaries!\n"); 2581 xhci_warn(xhci, "WARN: sg dma xfer crosses 64KB boundaries!\n");
2577 xhci_dbg(xhci, "Next boundary at %#x, end dma = %#x\n", 2582 xhci_dbg(xhci, "Next boundary at %#x, end dma = %#x\n",
2578 (unsigned int) (addr + TRB_MAX_BUFF_SIZE) & ~(TRB_MAX_BUFF_SIZE - 1), 2583 (unsigned int) (addr + TRB_MAX_BUFF_SIZE) & ~(TRB_MAX_BUFF_SIZE - 1),
@@ -2616,7 +2621,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
2616 } 2621 }
2617 2622
2618 trb_buff_len = TRB_MAX_BUFF_SIZE - 2623 trb_buff_len = TRB_MAX_BUFF_SIZE -
2619 (addr & ((1 << TRB_MAX_BUFF_SHIFT) - 1)); 2624 (addr & (TRB_MAX_BUFF_SIZE - 1));
2620 trb_buff_len = min_t(int, trb_buff_len, this_sg_len); 2625 trb_buff_len = min_t(int, trb_buff_len, this_sg_len);
2621 if (running_total + trb_buff_len > urb->transfer_buffer_length) 2626 if (running_total + trb_buff_len > urb->transfer_buffer_length)
2622 trb_buff_len = 2627 trb_buff_len =
@@ -2656,7 +2661,8 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
2656 num_trbs = 0; 2661 num_trbs = 0;
2657 /* How much data is (potentially) left before the 64KB boundary? */ 2662 /* How much data is (potentially) left before the 64KB boundary? */
2658 running_total = TRB_MAX_BUFF_SIZE - 2663 running_total = TRB_MAX_BUFF_SIZE -
2659 (urb->transfer_dma & ((1 << TRB_MAX_BUFF_SHIFT) - 1)); 2664 (urb->transfer_dma & (TRB_MAX_BUFF_SIZE - 1));
2665 running_total &= TRB_MAX_BUFF_SIZE - 1;
2660 2666
2661 /* If there's some data on this 64KB chunk, or we have to send a 2667 /* If there's some data on this 64KB chunk, or we have to send a
2662 * zero-length transfer, we need at least one TRB 2668 * zero-length transfer, we need at least one TRB
@@ -2700,8 +2706,8 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
2700 /* How much data is in the first TRB? */ 2706 /* How much data is in the first TRB? */
2701 addr = (u64) urb->transfer_dma; 2707 addr = (u64) urb->transfer_dma;
2702 trb_buff_len = TRB_MAX_BUFF_SIZE - 2708 trb_buff_len = TRB_MAX_BUFF_SIZE -
2703 (urb->transfer_dma & ((1 << TRB_MAX_BUFF_SHIFT) - 1)); 2709 (urb->transfer_dma & (TRB_MAX_BUFF_SIZE - 1));
2704 if (urb->transfer_buffer_length < trb_buff_len) 2710 if (trb_buff_len > urb->transfer_buffer_length)
2705 trb_buff_len = urb->transfer_buffer_length; 2711 trb_buff_len = urb->transfer_buffer_length;
2706 2712
2707 first_trb = true; 2713 first_trb = true;
@@ -2879,8 +2885,8 @@ static int count_isoc_trbs_needed(struct xhci_hcd *xhci,
2879 addr = (u64) (urb->transfer_dma + urb->iso_frame_desc[i].offset); 2885 addr = (u64) (urb->transfer_dma + urb->iso_frame_desc[i].offset);
2880 td_len = urb->iso_frame_desc[i].length; 2886 td_len = urb->iso_frame_desc[i].length;
2881 2887
2882 running_total = TRB_MAX_BUFF_SIZE - 2888 running_total = TRB_MAX_BUFF_SIZE - (addr & (TRB_MAX_BUFF_SIZE - 1));
2883 (addr & ((1 << TRB_MAX_BUFF_SHIFT) - 1)); 2889 running_total &= TRB_MAX_BUFF_SIZE - 1;
2884 if (running_total != 0) 2890 if (running_total != 0)
2885 num_trbs++; 2891 num_trbs++;
2886 2892
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 34cf4e165877..2083fc2179b2 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -109,7 +109,7 @@ int xhci_halt(struct xhci_hcd *xhci)
109/* 109/*
110 * Set the run bit and wait for the host to be running. 110 * Set the run bit and wait for the host to be running.
111 */ 111 */
112int xhci_start(struct xhci_hcd *xhci) 112static int xhci_start(struct xhci_hcd *xhci)
113{ 113{
114 u32 temp; 114 u32 temp;
115 int ret; 115 int ret;
@@ -329,7 +329,7 @@ int xhci_init(struct usb_hcd *hcd)
329 329
330 330
331#ifdef CONFIG_USB_XHCI_HCD_DEBUGGING 331#ifdef CONFIG_USB_XHCI_HCD_DEBUGGING
332void xhci_event_ring_work(unsigned long arg) 332static void xhci_event_ring_work(unsigned long arg)
333{ 333{
334 unsigned long flags; 334 unsigned long flags;
335 int temp; 335 int temp;
@@ -473,7 +473,7 @@ int xhci_run(struct usb_hcd *hcd)
473 xhci->ir_set, (unsigned int) ER_IRQ_ENABLE(temp)); 473 xhci->ir_set, (unsigned int) ER_IRQ_ENABLE(temp));
474 xhci_writel(xhci, ER_IRQ_ENABLE(temp), 474 xhci_writel(xhci, ER_IRQ_ENABLE(temp),
475 &xhci->ir_set->irq_pending); 475 &xhci->ir_set->irq_pending);
476 xhci_print_ir_set(xhci, xhci->ir_set, 0); 476 xhci_print_ir_set(xhci, 0);
477 477
478 if (NUM_TEST_NOOPS > 0) 478 if (NUM_TEST_NOOPS > 0)
479 doorbell = xhci_setup_one_noop(xhci); 479 doorbell = xhci_setup_one_noop(xhci);
@@ -528,7 +528,7 @@ void xhci_stop(struct usb_hcd *hcd)
528 temp = xhci_readl(xhci, &xhci->ir_set->irq_pending); 528 temp = xhci_readl(xhci, &xhci->ir_set->irq_pending);
529 xhci_writel(xhci, ER_IRQ_DISABLE(temp), 529 xhci_writel(xhci, ER_IRQ_DISABLE(temp),
530 &xhci->ir_set->irq_pending); 530 &xhci->ir_set->irq_pending);
531 xhci_print_ir_set(xhci, xhci->ir_set, 0); 531 xhci_print_ir_set(xhci, 0);
532 532
533 xhci_dbg(xhci, "cleaning up memory\n"); 533 xhci_dbg(xhci, "cleaning up memory\n");
534 xhci_mem_cleanup(xhci); 534 xhci_mem_cleanup(xhci);
@@ -755,7 +755,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
755 temp = xhci_readl(xhci, &xhci->ir_set->irq_pending); 755 temp = xhci_readl(xhci, &xhci->ir_set->irq_pending);
756 xhci_writel(xhci, ER_IRQ_DISABLE(temp), 756 xhci_writel(xhci, ER_IRQ_DISABLE(temp),
757 &xhci->ir_set->irq_pending); 757 &xhci->ir_set->irq_pending);
758 xhci_print_ir_set(xhci, xhci->ir_set, 0); 758 xhci_print_ir_set(xhci, 0);
759 759
760 xhci_dbg(xhci, "cleaning up memory\n"); 760 xhci_dbg(xhci, "cleaning up memory\n");
761 xhci_mem_cleanup(xhci); 761 xhci_mem_cleanup(xhci);
@@ -857,7 +857,7 @@ unsigned int xhci_last_valid_endpoint(u32 added_ctxs)
857/* Returns 1 if the arguments are OK; 857/* Returns 1 if the arguments are OK;
858 * returns 0 this is a root hub; returns -EINVAL for NULL pointers. 858 * returns 0 this is a root hub; returns -EINVAL for NULL pointers.
859 */ 859 */
860int xhci_check_args(struct usb_hcd *hcd, struct usb_device *udev, 860static int xhci_check_args(struct usb_hcd *hcd, struct usb_device *udev,
861 struct usb_host_endpoint *ep, int check_ep, bool check_virt_dev, 861 struct usb_host_endpoint *ep, int check_ep, bool check_virt_dev,
862 const char *func) { 862 const char *func) {
863 struct xhci_hcd *xhci; 863 struct xhci_hcd *xhci;
@@ -1693,7 +1693,7 @@ static void xhci_setup_input_ctx_for_config_ep(struct xhci_hcd *xhci,
1693 xhci_dbg_ctx(xhci, in_ctx, xhci_last_valid_endpoint(add_flags)); 1693 xhci_dbg_ctx(xhci, in_ctx, xhci_last_valid_endpoint(add_flags));
1694} 1694}
1695 1695
1696void xhci_setup_input_ctx_for_quirk(struct xhci_hcd *xhci, 1696static void xhci_setup_input_ctx_for_quirk(struct xhci_hcd *xhci,
1697 unsigned int slot_id, unsigned int ep_index, 1697 unsigned int slot_id, unsigned int ep_index,
1698 struct xhci_dequeue_state *deq_state) 1698 struct xhci_dequeue_state *deq_state)
1699{ 1699{
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 7f236fd22015..7f127df6dd55 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1348,7 +1348,7 @@ static inline int xhci_link_trb_quirk(struct xhci_hcd *xhci)
1348} 1348}
1349 1349
1350/* xHCI debugging */ 1350/* xHCI debugging */
1351void xhci_print_ir_set(struct xhci_hcd *xhci, struct xhci_intr_reg *ir_set, int set_num); 1351void xhci_print_ir_set(struct xhci_hcd *xhci, int set_num);
1352void xhci_print_registers(struct xhci_hcd *xhci); 1352void xhci_print_registers(struct xhci_hcd *xhci);
1353void xhci_dbg_regs(struct xhci_hcd *xhci); 1353void xhci_dbg_regs(struct xhci_hcd *xhci);
1354void xhci_print_run_regs(struct xhci_hcd *xhci); 1354void xhci_print_run_regs(struct xhci_hcd *xhci);
diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index 54a8bd1047d6..c292d5c499e7 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -1864,6 +1864,7 @@ allocate_instance(struct device *dev,
1864 INIT_LIST_HEAD(&musb->out_bulk); 1864 INIT_LIST_HEAD(&musb->out_bulk);
1865 1865
1866 hcd->uses_new_polling = 1; 1866 hcd->uses_new_polling = 1;
1867 hcd->has_tt = 1;
1867 1868
1868 musb->vbuserr_retry = VBUSERR_RETRY_COUNT; 1869 musb->vbuserr_retry = VBUSERR_RETRY_COUNT;
1869 musb->a_wait_bcon = OTG_TIME_A_WAIT_BCON; 1870 musb->a_wait_bcon = OTG_TIME_A_WAIT_BCON;
diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h
index d74a8113ae74..e6400be8a0f8 100644
--- a/drivers/usb/musb/musb_core.h
+++ b/drivers/usb/musb/musb_core.h
@@ -488,6 +488,15 @@ struct musb {
488 unsigned set_address:1; 488 unsigned set_address:1;
489 unsigned test_mode:1; 489 unsigned test_mode:1;
490 unsigned softconnect:1; 490 unsigned softconnect:1;
491
492 u8 address;
493 u8 test_mode_nr;
494 u16 ackpend; /* ep0 */
495 enum musb_g_ep0_state ep0_state;
496 struct usb_gadget g; /* the gadget */
497 struct usb_gadget_driver *gadget_driver; /* its driver */
498#endif
499
491 /* 500 /*
492 * FIXME: Remove this flag. 501 * FIXME: Remove this flag.
493 * 502 *
@@ -501,14 +510,6 @@ struct musb {
501 */ 510 */
502 unsigned double_buffer_not_ok:1 __deprecated; 511 unsigned double_buffer_not_ok:1 __deprecated;
503 512
504 u8 address;
505 u8 test_mode_nr;
506 u16 ackpend; /* ep0 */
507 enum musb_g_ep0_state ep0_state;
508 struct usb_gadget g; /* the gadget */
509 struct usb_gadget_driver *gadget_driver; /* its driver */
510#endif
511
512 struct musb_hdrc_config *config; 513 struct musb_hdrc_config *config;
513 514
514#ifdef MUSB_CONFIG_PROC_FS 515#ifdef MUSB_CONFIG_PROC_FS
diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
index a3f12333fc41..bc8badd16897 100644
--- a/drivers/usb/musb/omap2430.c
+++ b/drivers/usb/musb/omap2430.c
@@ -362,6 +362,7 @@ static int omap2430_musb_init(struct musb *musb)
362 362
363static int omap2430_musb_exit(struct musb *musb) 363static int omap2430_musb_exit(struct musb *musb)
364{ 364{
365 del_timer_sync(&musb_idle_timer);
365 366
366 omap2430_low_level_exit(musb); 367 omap2430_low_level_exit(musb);
367 otg_put_transceiver(musb->xceiv); 368 otg_put_transceiver(musb->xceiv);
diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index 7481ff8a49e4..0457813eebee 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -301,6 +301,9 @@ static const struct usb_device_id id_table[] = {
301 { USB_DEVICE(0x1199, 0x68A3), /* Sierra Wireless Direct IP modems */ 301 { USB_DEVICE(0x1199, 0x68A3), /* Sierra Wireless Direct IP modems */
302 .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist 302 .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist
303 }, 303 },
304 { USB_DEVICE(0x0f3d, 0x68A3), /* Airprime/Sierra Wireless Direct IP modems */
305 .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist
306 },
304 { USB_DEVICE(0x413C, 0x08133) }, /* Dell Computer Corp. Wireless 5720 VZW Mobile Broadband (EVDO Rev-A) Minicard GPS Port */ 307 { USB_DEVICE(0x413C, 0x08133) }, /* Dell Computer Corp. Wireless 5720 VZW Mobile Broadband (EVDO Rev-A) Minicard GPS Port */
305 308
306 { } 309 { }
diff --git a/drivers/usb/serial/usb_wwan.c b/drivers/usb/serial/usb_wwan.c
index b004b2a485c3..9c014e2ecd68 100644
--- a/drivers/usb/serial/usb_wwan.c
+++ b/drivers/usb/serial/usb_wwan.c
@@ -295,12 +295,15 @@ static void usb_wwan_indat_callback(struct urb *urb)
295 __func__, status, endpoint); 295 __func__, status, endpoint);
296 } else { 296 } else {
297 tty = tty_port_tty_get(&port->port); 297 tty = tty_port_tty_get(&port->port);
298 if (urb->actual_length) { 298 if (tty) {
299 tty_insert_flip_string(tty, data, urb->actual_length); 299 if (urb->actual_length) {
300 tty_flip_buffer_push(tty); 300 tty_insert_flip_string(tty, data,
301 } else 301 urb->actual_length);
302 dbg("%s: empty read urb received", __func__); 302 tty_flip_buffer_push(tty);
303 tty_kref_put(tty); 303 } else
304 dbg("%s: empty read urb received", __func__);
305 tty_kref_put(tty);
306 }
304 307
305 /* Resubmit urb so we continue receiving */ 308 /* Resubmit urb so we continue receiving */
306 if (status != -ESHUTDOWN) { 309 if (status != -ESHUTDOWN) {
diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c
index 15a5d89b7f39..1c11959a7d58 100644
--- a/drivers/usb/serial/visor.c
+++ b/drivers/usb/serial/visor.c
@@ -27,6 +27,7 @@
27#include <linux/uaccess.h> 27#include <linux/uaccess.h>
28#include <linux/usb.h> 28#include <linux/usb.h>
29#include <linux/usb/serial.h> 29#include <linux/usb/serial.h>
30#include <linux/usb/cdc.h>
30#include "visor.h" 31#include "visor.h"
31 32
32/* 33/*
@@ -479,6 +480,17 @@ static int visor_probe(struct usb_serial *serial,
479 480
480 dbg("%s", __func__); 481 dbg("%s", __func__);
481 482
483 /*
484 * some Samsung Android phones in modem mode have the same ID
485 * as SPH-I500, but they are ACM devices, so dont bind to them
486 */
487 if (id->idVendor == SAMSUNG_VENDOR_ID &&
488 id->idProduct == SAMSUNG_SPH_I500_ID &&
489 serial->dev->descriptor.bDeviceClass == USB_CLASS_COMM &&
490 serial->dev->descriptor.bDeviceSubClass ==
491 USB_CDC_SUBCLASS_ACM)
492 return -ENODEV;
493
482 if (serial->dev->actconfig->desc.bConfigurationValue != 1) { 494 if (serial->dev->actconfig->desc.bConfigurationValue != 1) {
483 dev_err(&serial->dev->dev, "active config #%d != 1 ??\n", 495 dev_err(&serial->dev->dev, "active config #%d != 1 ??\n",
484 serial->dev->actconfig->desc.bConfigurationValue); 496 serial->dev->actconfig->desc.bConfigurationValue);
diff --git a/drivers/video/backlight/ltv350qv.c b/drivers/video/backlight/ltv350qv.c
index 8010aaeb5adb..dd0e84a9bd2f 100644
--- a/drivers/video/backlight/ltv350qv.c
+++ b/drivers/video/backlight/ltv350qv.c
@@ -239,11 +239,15 @@ static int __devinit ltv350qv_probe(struct spi_device *spi)
239 lcd->spi = spi; 239 lcd->spi = spi;
240 lcd->power = FB_BLANK_POWERDOWN; 240 lcd->power = FB_BLANK_POWERDOWN;
241 lcd->buffer = kzalloc(8, GFP_KERNEL); 241 lcd->buffer = kzalloc(8, GFP_KERNEL);
242 if (!lcd->buffer) {
243 ret = -ENOMEM;
244 goto out_free_lcd;
245 }
242 246
243 ld = lcd_device_register("ltv350qv", &spi->dev, lcd, &ltv_ops); 247 ld = lcd_device_register("ltv350qv", &spi->dev, lcd, &ltv_ops);
244 if (IS_ERR(ld)) { 248 if (IS_ERR(ld)) {
245 ret = PTR_ERR(ld); 249 ret = PTR_ERR(ld);
246 goto out_free_lcd; 250 goto out_free_buffer;
247 } 251 }
248 lcd->ld = ld; 252 lcd->ld = ld;
249 253
@@ -257,6 +261,8 @@ static int __devinit ltv350qv_probe(struct spi_device *spi)
257 261
258out_unregister: 262out_unregister:
259 lcd_device_unregister(ld); 263 lcd_device_unregister(ld);
264out_free_buffer:
265 kfree(lcd->buffer);
260out_free_lcd: 266out_free_lcd:
261 kfree(lcd); 267 kfree(lcd);
262 return ret; 268 return ret;
@@ -268,6 +274,7 @@ static int __devexit ltv350qv_remove(struct spi_device *spi)
268 274
269 ltv350qv_power(lcd, FB_BLANK_POWERDOWN); 275 ltv350qv_power(lcd, FB_BLANK_POWERDOWN);
270 lcd_device_unregister(lcd->ld); 276 lcd_device_unregister(lcd->ld);
277 kfree(lcd->buffer);
271 kfree(lcd); 278 kfree(lcd);
272 279
273 return 0; 280 return 0;
diff --git a/drivers/watchdog/cpwd.c b/drivers/watchdog/cpwd.c
index eca855a55c0d..3de4ba0260a5 100644
--- a/drivers/watchdog/cpwd.c
+++ b/drivers/watchdog/cpwd.c
@@ -646,7 +646,7 @@ static int __devexit cpwd_remove(struct platform_device *op)
646 struct cpwd *p = dev_get_drvdata(&op->dev); 646 struct cpwd *p = dev_get_drvdata(&op->dev);
647 int i; 647 int i;
648 648
649 for (i = 0; i < 4; i++) { 649 for (i = 0; i < WD_NUMDEVS; i++) {
650 misc_deregister(&p->devs[i].misc); 650 misc_deregister(&p->devs[i].misc);
651 651
652 if (!p->enabled) { 652 if (!p->enabled) {
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index 24b966d5061a..204a5603c4ae 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -710,7 +710,7 @@ static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev)
710 return 0; 710 return 0;
711} 711}
712 712
713static void __devexit hpwdt_exit_nmi_decoding(void) 713static void hpwdt_exit_nmi_decoding(void)
714{ 714{
715 unregister_die_notifier(&die_notifier); 715 unregister_die_notifier(&die_notifier);
716 if (cru_rom_addr) 716 if (cru_rom_addr)
@@ -726,7 +726,7 @@ static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev)
726 return 0; 726 return 0;
727} 727}
728 728
729static void __devexit hpwdt_exit_nmi_decoding(void) 729static void hpwdt_exit_nmi_decoding(void)
730{ 730{
731} 731}
732#endif /* CONFIG_HPWDT_NMI_DECODING */ 732#endif /* CONFIG_HPWDT_NMI_DECODING */
diff --git a/drivers/watchdog/sbc_fitpc2_wdt.c b/drivers/watchdog/sbc_fitpc2_wdt.c
index c7d67e9a7465..79906255eeb6 100644
--- a/drivers/watchdog/sbc_fitpc2_wdt.c
+++ b/drivers/watchdog/sbc_fitpc2_wdt.c
@@ -201,11 +201,14 @@ static struct miscdevice fitpc2_wdt_miscdev = {
201static int __init fitpc2_wdt_init(void) 201static int __init fitpc2_wdt_init(void)
202{ 202{
203 int err; 203 int err;
204 const char *brd_name;
204 205
205 if (!strstr(dmi_get_system_info(DMI_BOARD_NAME), "SBC-FITPC2")) 206 brd_name = dmi_get_system_info(DMI_BOARD_NAME);
207
208 if (!brd_name || !strstr(brd_name, "SBC-FITPC2"))
206 return -ENODEV; 209 return -ENODEV;
207 210
208 pr_info("%s found\n", dmi_get_system_info(DMI_BOARD_NAME)); 211 pr_info("%s found\n", brd_name);
209 212
210 if (!request_region(COMMAND_PORT, 1, WATCHDOG_NAME)) { 213 if (!request_region(COMMAND_PORT, 1, WATCHDOG_NAME)) {
211 pr_err("I/O address 0x%04x already in use\n", COMMAND_PORT); 214 pr_err("I/O address 0x%04x already in use\n", COMMAND_PORT);
diff --git a/drivers/watchdog/sch311x_wdt.c b/drivers/watchdog/sch311x_wdt.c
index 0461858e07d0..b61ab1c54293 100644
--- a/drivers/watchdog/sch311x_wdt.c
+++ b/drivers/watchdog/sch311x_wdt.c
@@ -508,7 +508,7 @@ static int __init sch311x_detect(int sio_config_port, unsigned short *addr)
508 sch311x_sio_outb(sio_config_port, 0x07, 0x0a); 508 sch311x_sio_outb(sio_config_port, 0x07, 0x0a);
509 509
510 /* Check if Logical Device Register is currently active */ 510 /* Check if Logical Device Register is currently active */
511 if (sch311x_sio_inb(sio_config_port, 0x30) && 0x01 == 0) 511 if ((sch311x_sio_inb(sio_config_port, 0x30) & 0x01) == 0)
512 printk(KERN_INFO PFX "Seems that LDN 0x0a is not active...\n"); 512 printk(KERN_INFO PFX "Seems that LDN 0x0a is not active...\n");
513 513
514 /* Get the base address of the runtime registers */ 514 /* Get the base address of the runtime registers */
diff --git a/drivers/watchdog/w83697ug_wdt.c b/drivers/watchdog/w83697ug_wdt.c
index a6c12dec91a1..df2a64dc9672 100644
--- a/drivers/watchdog/w83697ug_wdt.c
+++ b/drivers/watchdog/w83697ug_wdt.c
@@ -109,7 +109,7 @@ static int w83697ug_select_wd_register(void)
109 outb_p(0x08, WDT_EFDR); /* select logical device 8 (GPIO2) */ 109 outb_p(0x08, WDT_EFDR); /* select logical device 8 (GPIO2) */
110 outb_p(0x30, WDT_EFER); /* select CR30 */ 110 outb_p(0x30, WDT_EFER); /* select CR30 */
111 c = inb_p(WDT_EFDR); 111 c = inb_p(WDT_EFDR);
112 outb_p(c || 0x01, WDT_EFDR); /* set bit 0 to activate GPIO2 */ 112 outb_p(c | 0x01, WDT_EFDR); /* set bit 0 to activate GPIO2 */
113 113
114 return 0; 114 return 0;
115} 115}
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 43f9f02c7db0..718050ace08f 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -232,7 +232,7 @@ static int increase_reservation(unsigned long nr_pages)
232 set_phys_to_machine(pfn, frame_list[i]); 232 set_phys_to_machine(pfn, frame_list[i]);
233 233
234 /* Link back into the page tables if not highmem. */ 234 /* Link back into the page tables if not highmem. */
235 if (pfn < max_low_pfn) { 235 if (!xen_hvm_domain() && pfn < max_low_pfn) {
236 int ret; 236 int ret;
237 ret = HYPERVISOR_update_va_mapping( 237 ret = HYPERVISOR_update_va_mapping(
238 (unsigned long)__va(pfn << PAGE_SHIFT), 238 (unsigned long)__va(pfn << PAGE_SHIFT),
@@ -280,7 +280,7 @@ static int decrease_reservation(unsigned long nr_pages)
280 280
281 scrub_page(page); 281 scrub_page(page);
282 282
283 if (!PageHighMem(page)) { 283 if (!xen_hvm_domain() && !PageHighMem(page)) {
284 ret = HYPERVISOR_update_va_mapping( 284 ret = HYPERVISOR_update_va_mapping(
285 (unsigned long)__va(pfn << PAGE_SHIFT), 285 (unsigned long)__va(pfn << PAGE_SHIFT),
286 __pte_ma(0), 0); 286 __pte_ma(0), 0);
@@ -296,7 +296,7 @@ static int decrease_reservation(unsigned long nr_pages)
296 /* No more mappings: invalidate P2M and add to balloon. */ 296 /* No more mappings: invalidate P2M and add to balloon. */
297 for (i = 0; i < nr_pages; i++) { 297 for (i = 0; i < nr_pages; i++) {
298 pfn = mfn_to_pfn(frame_list[i]); 298 pfn = mfn_to_pfn(frame_list[i]);
299 set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 299 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
300 balloon_append(pfn_to_page(pfn)); 300 balloon_append(pfn_to_page(pfn));
301 } 301 }
302 302
@@ -392,15 +392,19 @@ static struct notifier_block xenstore_notifier;
392 392
393static int __init balloon_init(void) 393static int __init balloon_init(void)
394{ 394{
395 unsigned long pfn, extra_pfn_end; 395 unsigned long pfn, nr_pages, extra_pfn_end;
396 struct page *page; 396 struct page *page;
397 397
398 if (!xen_pv_domain()) 398 if (!xen_domain())
399 return -ENODEV; 399 return -ENODEV;
400 400
401 pr_info("xen_balloon: Initialising balloon driver.\n"); 401 pr_info("xen_balloon: Initialising balloon driver.\n");
402 402
403 balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn); 403 if (xen_pv_domain())
404 nr_pages = xen_start_info->nr_pages;
405 else
406 nr_pages = max_pfn;
407 balloon_stats.current_pages = min(nr_pages, max_pfn);
404 balloon_stats.target_pages = balloon_stats.current_pages; 408 balloon_stats.target_pages = balloon_stats.current_pages;
405 balloon_stats.balloon_low = 0; 409 balloon_stats.balloon_low = 0;
406 balloon_stats.balloon_high = 0; 410 balloon_stats.balloon_high = 0;
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 74681478100a..0ad1699a1b3e 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -114,7 +114,7 @@ struct cpu_evtchn_s {
114static __initdata struct cpu_evtchn_s init_evtchn_mask = { 114static __initdata struct cpu_evtchn_s init_evtchn_mask = {
115 .bits[0 ... (NR_EVENT_CHANNELS/BITS_PER_LONG)-1] = ~0ul, 115 .bits[0 ... (NR_EVENT_CHANNELS/BITS_PER_LONG)-1] = ~0ul,
116}; 116};
117static struct cpu_evtchn_s *cpu_evtchn_mask_p = &init_evtchn_mask; 117static struct cpu_evtchn_s __refdata *cpu_evtchn_mask_p = &init_evtchn_mask;
118 118
119static inline unsigned long *cpu_evtchn_mask(int cpu) 119static inline unsigned long *cpu_evtchn_mask(int cpu)
120{ 120{
@@ -277,7 +277,7 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
277 277
278 BUG_ON(irq == -1); 278 BUG_ON(irq == -1);
279#ifdef CONFIG_SMP 279#ifdef CONFIG_SMP
280 cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu)); 280 cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu));
281#endif 281#endif
282 282
283 clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq))); 283 clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq)));
@@ -294,7 +294,7 @@ static void init_evtchn_cpu_bindings(void)
294 294
295 /* By default all event channels notify CPU#0. */ 295 /* By default all event channels notify CPU#0. */
296 for_each_irq_desc(i, desc) { 296 for_each_irq_desc(i, desc) {
297 cpumask_copy(desc->affinity, cpumask_of(0)); 297 cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
298 } 298 }
299#endif 299#endif
300 300
@@ -376,81 +376,69 @@ static void unmask_evtchn(int port)
376 put_cpu(); 376 put_cpu();
377} 377}
378 378
379static int get_nr_hw_irqs(void) 379static int xen_allocate_irq_dynamic(void)
380{ 380{
381 int ret = 1; 381 int first = 0;
382 int irq;
382 383
383#ifdef CONFIG_X86_IO_APIC 384#ifdef CONFIG_X86_IO_APIC
384 ret = get_nr_irqs_gsi(); 385 /*
386 * For an HVM guest or domain 0 which see "real" (emulated or
387 * actual repectively) GSIs we allocate dynamic IRQs
388 * e.g. those corresponding to event channels or MSIs
389 * etc. from the range above those "real" GSIs to avoid
390 * collisions.
391 */
392 if (xen_initial_domain() || xen_hvm_domain())
393 first = get_nr_irqs_gsi();
385#endif 394#endif
386 395
387 return ret; 396retry:
388} 397 irq = irq_alloc_desc_from(first, -1);
389 398
390static int find_unbound_pirq(int type) 399 if (irq == -ENOMEM && first > NR_IRQS_LEGACY) {
391{ 400 printk(KERN_ERR "Out of dynamic IRQ space and eating into GSI space. You should increase nr_irqs\n");
392 int rc, i; 401 first = max(NR_IRQS_LEGACY, first - NR_IRQS_LEGACY);
393 struct physdev_get_free_pirq op_get_free_pirq; 402 goto retry;
394 op_get_free_pirq.type = type; 403 }
395 404
396 rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq); 405 if (irq < 0)
397 if (!rc) 406 panic("No available IRQ to bind to: increase nr_irqs!\n");
398 return op_get_free_pirq.pirq;
399 407
400 for (i = 0; i < nr_irqs; i++) { 408 return irq;
401 if (pirq_to_irq[i] < 0)
402 return i;
403 }
404 return -1;
405} 409}
406 410
407static int find_unbound_irq(void) 411static int xen_allocate_irq_gsi(unsigned gsi)
408{ 412{
409 struct irq_data *data; 413 int irq;
410 int irq, res;
411 int bottom = get_nr_hw_irqs();
412 int top = nr_irqs-1;
413
414 if (bottom == nr_irqs)
415 goto no_irqs;
416 414
417 /* This loop starts from the top of IRQ space and goes down. 415 /*
418 * We need this b/c if we have a PCI device in a Xen PV guest 416 * A PV guest has no concept of a GSI (since it has no ACPI
419 * we do not have an IO-APIC (though the backend might have them) 417 * nor access to/knowledge of the physical APICs). Therefore
420 * mapped in. To not have a collision of physical IRQs with the Xen 418 * all IRQs are dynamically allocated from the entire IRQ
421 * event channels start at the top of the IRQ space for virtual IRQs. 419 * space.
422 */ 420 */
423 for (irq = top; irq > bottom; irq--) { 421 if (xen_pv_domain() && !xen_initial_domain())
424 data = irq_get_irq_data(irq); 422 return xen_allocate_irq_dynamic();
425 /* only 15->0 have init'd desc; handle irq > 16 */
426 if (!data)
427 break;
428 if (data->chip == &no_irq_chip)
429 break;
430 if (data->chip != &xen_dynamic_chip)
431 continue;
432 if (irq_info[irq].type == IRQT_UNBOUND)
433 return irq;
434 }
435
436 if (irq == bottom)
437 goto no_irqs;
438 423
439 res = irq_alloc_desc_at(irq, -1); 424 /* Legacy IRQ descriptors are already allocated by the arch. */
425 if (gsi < NR_IRQS_LEGACY)
426 return gsi;
440 427
441 if (WARN_ON(res != irq)) 428 irq = irq_alloc_desc_at(gsi, -1);
442 return -1; 429 if (irq < 0)
430 panic("Unable to allocate to IRQ%d (%d)\n", gsi, irq);
443 431
444 return irq; 432 return irq;
445
446no_irqs:
447 panic("No available IRQ to bind to: increase nr_irqs!\n");
448} 433}
449 434
450static bool identity_mapped_irq(unsigned irq) 435static void xen_free_irq(unsigned irq)
451{ 436{
452 /* identity map all the hardware irqs */ 437 /* Legacy IRQ descriptors are managed by the arch. */
453 return irq < get_nr_hw_irqs(); 438 if (irq < NR_IRQS_LEGACY)
439 return;
440
441 irq_free_desc(irq);
454} 442}
455 443
456static void pirq_unmask_notify(int irq) 444static void pirq_unmask_notify(int irq)
@@ -486,7 +474,7 @@ static bool probing_irq(int irq)
486 return desc && desc->action == NULL; 474 return desc && desc->action == NULL;
487} 475}
488 476
489static unsigned int startup_pirq(unsigned int irq) 477static unsigned int __startup_pirq(unsigned int irq)
490{ 478{
491 struct evtchn_bind_pirq bind_pirq; 479 struct evtchn_bind_pirq bind_pirq;
492 struct irq_info *info = info_for_irq(irq); 480 struct irq_info *info = info_for_irq(irq);
@@ -524,9 +512,15 @@ out:
524 return 0; 512 return 0;
525} 513}
526 514
527static void shutdown_pirq(unsigned int irq) 515static unsigned int startup_pirq(struct irq_data *data)
516{
517 return __startup_pirq(data->irq);
518}
519
520static void shutdown_pirq(struct irq_data *data)
528{ 521{
529 struct evtchn_close close; 522 struct evtchn_close close;
523 unsigned int irq = data->irq;
530 struct irq_info *info = info_for_irq(irq); 524 struct irq_info *info = info_for_irq(irq);
531 int evtchn = evtchn_from_irq(irq); 525 int evtchn = evtchn_from_irq(irq);
532 526
@@ -546,20 +540,20 @@ static void shutdown_pirq(unsigned int irq)
546 info->evtchn = 0; 540 info->evtchn = 0;
547} 541}
548 542
549static void enable_pirq(unsigned int irq) 543static void enable_pirq(struct irq_data *data)
550{ 544{
551 startup_pirq(irq); 545 startup_pirq(data);
552} 546}
553 547
554static void disable_pirq(unsigned int irq) 548static void disable_pirq(struct irq_data *data)
555{ 549{
556} 550}
557 551
558static void ack_pirq(unsigned int irq) 552static void ack_pirq(struct irq_data *data)
559{ 553{
560 int evtchn = evtchn_from_irq(irq); 554 int evtchn = evtchn_from_irq(data->irq);
561 555
562 move_native_irq(irq); 556 move_native_irq(data->irq);
563 557
564 if (VALID_EVTCHN(evtchn)) { 558 if (VALID_EVTCHN(evtchn)) {
565 mask_evtchn(evtchn); 559 mask_evtchn(evtchn);
@@ -567,23 +561,6 @@ static void ack_pirq(unsigned int irq)
567 } 561 }
568} 562}
569 563
570static void end_pirq(unsigned int irq)
571{
572 int evtchn = evtchn_from_irq(irq);
573 struct irq_desc *desc = irq_to_desc(irq);
574
575 if (WARN_ON(!desc))
576 return;
577
578 if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) ==
579 (IRQ_DISABLED|IRQ_PENDING)) {
580 shutdown_pirq(irq);
581 } else if (VALID_EVTCHN(evtchn)) {
582 unmask_evtchn(evtchn);
583 pirq_unmask_notify(irq);
584 }
585}
586
587static int find_irq_by_gsi(unsigned gsi) 564static int find_irq_by_gsi(unsigned gsi)
588{ 565{
589 int irq; 566 int irq;
@@ -638,14 +615,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
638 goto out; /* XXX need refcount? */ 615 goto out; /* XXX need refcount? */
639 } 616 }
640 617
641 /* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore 618 irq = xen_allocate_irq_gsi(gsi);
642 * we are using the !xen_initial_domain() to drop in the function.*/
643 if (identity_mapped_irq(gsi) || (!xen_initial_domain() &&
644 xen_pv_domain())) {
645 irq = gsi;
646 irq_alloc_desc_at(irq, -1);
647 } else
648 irq = find_unbound_irq();
649 619
650 set_irq_chip_and_handler_name(irq, &xen_pirq_chip, 620 set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
651 handle_level_irq, name); 621 handle_level_irq, name);
@@ -658,7 +628,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
658 * this in the priv domain. */ 628 * this in the priv domain. */
659 if (xen_initial_domain() && 629 if (xen_initial_domain() &&
660 HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { 630 HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
661 irq_free_desc(irq); 631 xen_free_irq(irq);
662 irq = -ENOSPC; 632 irq = -ENOSPC;
663 goto out; 633 goto out;
664 } 634 }
@@ -674,87 +644,46 @@ out:
674} 644}
675 645
676#ifdef CONFIG_PCI_MSI 646#ifdef CONFIG_PCI_MSI
677#include <linux/msi.h> 647int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
678#include "../pci/msi.h"
679
680void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc)
681{ 648{
682 spin_lock(&irq_mapping_update_lock); 649 int rc;
683 650 struct physdev_get_free_pirq op_get_free_pirq;
684 if (alloc & XEN_ALLOC_IRQ) {
685 *irq = find_unbound_irq();
686 if (*irq == -1)
687 goto out;
688 }
689
690 if (alloc & XEN_ALLOC_PIRQ) {
691 *pirq = find_unbound_pirq(MAP_PIRQ_TYPE_MSI);
692 if (*pirq == -1)
693 goto out;
694 }
695 651
696 set_irq_chip_and_handler_name(*irq, &xen_pirq_chip, 652 op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI;
697 handle_level_irq, name); 653 rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
698 654
699 irq_info[*irq] = mk_pirq_info(0, *pirq, 0, 0); 655 WARN_ONCE(rc == -ENOSYS,
700 pirq_to_irq[*pirq] = *irq; 656 "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n");
701 657
702out: 658 return rc ? -1 : op_get_free_pirq.pirq;
703 spin_unlock(&irq_mapping_update_lock);
704} 659}
705 660
706int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type) 661int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
662 int pirq, int vector, const char *name)
707{ 663{
708 int irq = -1; 664 int irq, ret;
709 struct physdev_map_pirq map_irq;
710 int rc;
711 int pos;
712 u32 table_offset, bir;
713
714 memset(&map_irq, 0, sizeof(map_irq));
715 map_irq.domid = DOMID_SELF;
716 map_irq.type = MAP_PIRQ_TYPE_MSI;
717 map_irq.index = -1;
718 map_irq.pirq = -1;
719 map_irq.bus = dev->bus->number;
720 map_irq.devfn = dev->devfn;
721
722 if (type == PCI_CAP_ID_MSIX) {
723 pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
724
725 pci_read_config_dword(dev, msix_table_offset_reg(pos),
726 &table_offset);
727 bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
728
729 map_irq.table_base = pci_resource_start(dev, bir);
730 map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
731 }
732 665
733 spin_lock(&irq_mapping_update_lock); 666 spin_lock(&irq_mapping_update_lock);
734 667
735 irq = find_unbound_irq(); 668 irq = xen_allocate_irq_dynamic();
736
737 if (irq == -1) 669 if (irq == -1)
738 goto out; 670 goto out;
739 671
740 rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
741 if (rc) {
742 printk(KERN_WARNING "xen map irq failed %d\n", rc);
743
744 irq_free_desc(irq);
745
746 irq = -1;
747 goto out;
748 }
749 irq_info[irq] = mk_pirq_info(0, map_irq.pirq, 0, map_irq.index);
750
751 set_irq_chip_and_handler_name(irq, &xen_pirq_chip, 672 set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
752 handle_level_irq, 673 handle_level_irq, name);
753 (type == PCI_CAP_ID_MSIX) ? "msi-x":"msi");
754 674
675 irq_info[irq] = mk_pirq_info(0, pirq, 0, vector);
676 pirq_to_irq[pirq] = irq;
677 ret = irq_set_msi_desc(irq, msidesc);
678 if (ret < 0)
679 goto error_irq;
755out: 680out:
756 spin_unlock(&irq_mapping_update_lock); 681 spin_unlock(&irq_mapping_update_lock);
757 return irq; 682 return irq;
683error_irq:
684 spin_unlock(&irq_mapping_update_lock);
685 xen_free_irq(irq);
686 return -1;
758} 687}
759#endif 688#endif
760 689
@@ -779,11 +708,12 @@ int xen_destroy_irq(int irq)
779 printk(KERN_WARNING "unmap irq failed %d\n", rc); 708 printk(KERN_WARNING "unmap irq failed %d\n", rc);
780 goto out; 709 goto out;
781 } 710 }
782 pirq_to_irq[info->u.pirq.pirq] = -1;
783 } 711 }
712 pirq_to_irq[info->u.pirq.pirq] = -1;
713
784 irq_info[irq] = mk_unbound_info(); 714 irq_info[irq] = mk_unbound_info();
785 715
786 irq_free_desc(irq); 716 xen_free_irq(irq);
787 717
788out: 718out:
789 spin_unlock(&irq_mapping_update_lock); 719 spin_unlock(&irq_mapping_update_lock);
@@ -814,7 +744,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
814 irq = evtchn_to_irq[evtchn]; 744 irq = evtchn_to_irq[evtchn];
815 745
816 if (irq == -1) { 746 if (irq == -1) {
817 irq = find_unbound_irq(); 747 irq = xen_allocate_irq_dynamic();
818 748
819 set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, 749 set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
820 handle_fasteoi_irq, "event"); 750 handle_fasteoi_irq, "event");
@@ -839,7 +769,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
839 irq = per_cpu(ipi_to_irq, cpu)[ipi]; 769 irq = per_cpu(ipi_to_irq, cpu)[ipi];
840 770
841 if (irq == -1) { 771 if (irq == -1) {
842 irq = find_unbound_irq(); 772 irq = xen_allocate_irq_dynamic();
843 if (irq < 0) 773 if (irq < 0)
844 goto out; 774 goto out;
845 775
@@ -875,7 +805,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
875 irq = per_cpu(virq_to_irq, cpu)[virq]; 805 irq = per_cpu(virq_to_irq, cpu)[virq];
876 806
877 if (irq == -1) { 807 if (irq == -1) {
878 irq = find_unbound_irq(); 808 irq = xen_allocate_irq_dynamic();
879 809
880 set_irq_chip_and_handler_name(irq, &xen_percpu_chip, 810 set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
881 handle_percpu_irq, "virq"); 811 handle_percpu_irq, "virq");
@@ -934,7 +864,7 @@ static void unbind_from_irq(unsigned int irq)
934 if (irq_info[irq].type != IRQT_UNBOUND) { 864 if (irq_info[irq].type != IRQT_UNBOUND) {
935 irq_info[irq] = mk_unbound_info(); 865 irq_info[irq] = mk_unbound_info();
936 866
937 irq_free_desc(irq); 867 xen_free_irq(irq);
938 } 868 }
939 869
940 spin_unlock(&irq_mapping_update_lock); 870 spin_unlock(&irq_mapping_update_lock);
@@ -990,7 +920,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
990 if (irq < 0) 920 if (irq < 0)
991 return irq; 921 return irq;
992 922
993 irqflags |= IRQF_NO_SUSPEND; 923 irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME;
994 retval = request_irq(irq, handler, irqflags, devname, dev_id); 924 retval = request_irq(irq, handler, irqflags, devname, dev_id);
995 if (retval != 0) { 925 if (retval != 0) {
996 unbind_from_irq(irq); 926 unbind_from_irq(irq);
@@ -1234,11 +1164,12 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
1234 return 0; 1164 return 0;
1235} 1165}
1236 1166
1237static int set_affinity_irq(unsigned irq, const struct cpumask *dest) 1167static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
1168 bool force)
1238{ 1169{
1239 unsigned tcpu = cpumask_first(dest); 1170 unsigned tcpu = cpumask_first(dest);
1240 1171
1241 return rebind_irq_to_cpu(irq, tcpu); 1172 return rebind_irq_to_cpu(data->irq, tcpu);
1242} 1173}
1243 1174
1244int resend_irq_on_evtchn(unsigned int irq) 1175int resend_irq_on_evtchn(unsigned int irq)
@@ -1257,35 +1188,35 @@ int resend_irq_on_evtchn(unsigned int irq)
1257 return 1; 1188 return 1;
1258} 1189}
1259 1190
1260static void enable_dynirq(unsigned int irq) 1191static void enable_dynirq(struct irq_data *data)
1261{ 1192{
1262 int evtchn = evtchn_from_irq(irq); 1193 int evtchn = evtchn_from_irq(data->irq);
1263 1194
1264 if (VALID_EVTCHN(evtchn)) 1195 if (VALID_EVTCHN(evtchn))
1265 unmask_evtchn(evtchn); 1196 unmask_evtchn(evtchn);
1266} 1197}
1267 1198
1268static void disable_dynirq(unsigned int irq) 1199static void disable_dynirq(struct irq_data *data)
1269{ 1200{
1270 int evtchn = evtchn_from_irq(irq); 1201 int evtchn = evtchn_from_irq(data->irq);
1271 1202
1272 if (VALID_EVTCHN(evtchn)) 1203 if (VALID_EVTCHN(evtchn))
1273 mask_evtchn(evtchn); 1204 mask_evtchn(evtchn);
1274} 1205}
1275 1206
1276static void ack_dynirq(unsigned int irq) 1207static void ack_dynirq(struct irq_data *data)
1277{ 1208{
1278 int evtchn = evtchn_from_irq(irq); 1209 int evtchn = evtchn_from_irq(data->irq);
1279 1210
1280 move_masked_irq(irq); 1211 move_masked_irq(data->irq);
1281 1212
1282 if (VALID_EVTCHN(evtchn)) 1213 if (VALID_EVTCHN(evtchn))
1283 unmask_evtchn(evtchn); 1214 unmask_evtchn(evtchn);
1284} 1215}
1285 1216
1286static int retrigger_dynirq(unsigned int irq) 1217static int retrigger_dynirq(struct irq_data *data)
1287{ 1218{
1288 int evtchn = evtchn_from_irq(irq); 1219 int evtchn = evtchn_from_irq(data->irq);
1289 struct shared_info *sh = HYPERVISOR_shared_info; 1220 struct shared_info *sh = HYPERVISOR_shared_info;
1290 int ret = 0; 1221 int ret = 0;
1291 1222
@@ -1334,7 +1265,7 @@ static void restore_cpu_pirqs(void)
1334 1265
1335 printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq); 1266 printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
1336 1267
1337 startup_pirq(irq); 1268 __startup_pirq(irq);
1338 } 1269 }
1339} 1270}
1340 1271
@@ -1445,7 +1376,6 @@ void xen_poll_irq(int irq)
1445void xen_irq_resume(void) 1376void xen_irq_resume(void)
1446{ 1377{
1447 unsigned int cpu, irq, evtchn; 1378 unsigned int cpu, irq, evtchn;
1448 struct irq_desc *desc;
1449 1379
1450 init_evtchn_cpu_bindings(); 1380 init_evtchn_cpu_bindings();
1451 1381
@@ -1465,66 +1395,48 @@ void xen_irq_resume(void)
1465 restore_cpu_ipis(cpu); 1395 restore_cpu_ipis(cpu);
1466 } 1396 }
1467 1397
1468 /*
1469 * Unmask any IRQF_NO_SUSPEND IRQs which are enabled. These
1470 * are not handled by the IRQ core.
1471 */
1472 for_each_irq_desc(irq, desc) {
1473 if (!desc->action || !(desc->action->flags & IRQF_NO_SUSPEND))
1474 continue;
1475 if (desc->status & IRQ_DISABLED)
1476 continue;
1477
1478 evtchn = evtchn_from_irq(irq);
1479 if (evtchn == -1)
1480 continue;
1481
1482 unmask_evtchn(evtchn);
1483 }
1484
1485 restore_cpu_pirqs(); 1398 restore_cpu_pirqs();
1486} 1399}
1487 1400
1488static struct irq_chip xen_dynamic_chip __read_mostly = { 1401static struct irq_chip xen_dynamic_chip __read_mostly = {
1489 .name = "xen-dyn", 1402 .name = "xen-dyn",
1490 1403
1491 .disable = disable_dynirq, 1404 .irq_disable = disable_dynirq,
1492 .mask = disable_dynirq, 1405 .irq_mask = disable_dynirq,
1493 .unmask = enable_dynirq, 1406 .irq_unmask = enable_dynirq,
1494 1407
1495 .eoi = ack_dynirq, 1408 .irq_eoi = ack_dynirq,
1496 .set_affinity = set_affinity_irq, 1409 .irq_set_affinity = set_affinity_irq,
1497 .retrigger = retrigger_dynirq, 1410 .irq_retrigger = retrigger_dynirq,
1498}; 1411};
1499 1412
1500static struct irq_chip xen_pirq_chip __read_mostly = { 1413static struct irq_chip xen_pirq_chip __read_mostly = {
1501 .name = "xen-pirq", 1414 .name = "xen-pirq",
1502 1415
1503 .startup = startup_pirq, 1416 .irq_startup = startup_pirq,
1504 .shutdown = shutdown_pirq, 1417 .irq_shutdown = shutdown_pirq,
1505 1418
1506 .enable = enable_pirq, 1419 .irq_enable = enable_pirq,
1507 .unmask = enable_pirq, 1420 .irq_unmask = enable_pirq,
1508 1421
1509 .disable = disable_pirq, 1422 .irq_disable = disable_pirq,
1510 .mask = disable_pirq, 1423 .irq_mask = disable_pirq,
1511 1424
1512 .ack = ack_pirq, 1425 .irq_ack = ack_pirq,
1513 .end = end_pirq,
1514 1426
1515 .set_affinity = set_affinity_irq, 1427 .irq_set_affinity = set_affinity_irq,
1516 1428
1517 .retrigger = retrigger_dynirq, 1429 .irq_retrigger = retrigger_dynirq,
1518}; 1430};
1519 1431
1520static struct irq_chip xen_percpu_chip __read_mostly = { 1432static struct irq_chip xen_percpu_chip __read_mostly = {
1521 .name = "xen-percpu", 1433 .name = "xen-percpu",
1522 1434
1523 .disable = disable_dynirq, 1435 .irq_disable = disable_dynirq,
1524 .mask = disable_dynirq, 1436 .irq_mask = disable_dynirq,
1525 .unmask = enable_dynirq, 1437 .irq_unmask = enable_dynirq,
1526 1438
1527 .ack = ack_dynirq, 1439 .irq_ack = ack_dynirq,
1528}; 1440};
1529 1441
1530int xen_set_callback_via(uint64_t via) 1442int xen_set_callback_via(uint64_t via)
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 24177272bcb8..ebb292859b59 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -34,42 +34,38 @@ enum shutdown_state {
34/* Ignore multiple shutdown requests. */ 34/* Ignore multiple shutdown requests. */
35static enum shutdown_state shutting_down = SHUTDOWN_INVALID; 35static enum shutdown_state shutting_down = SHUTDOWN_INVALID;
36 36
37#ifdef CONFIG_PM_SLEEP 37struct suspend_info {
38static int xen_hvm_suspend(void *data) 38 int cancelled;
39{ 39 unsigned long arg; /* extra hypercall argument */
40 int err; 40 void (*pre)(void);
41 struct sched_shutdown r = { .reason = SHUTDOWN_suspend }; 41 void (*post)(int cancelled);
42 int *cancelled = data; 42};
43
44 BUG_ON(!irqs_disabled());
45
46 err = sysdev_suspend(PMSG_SUSPEND);
47 if (err) {
48 printk(KERN_ERR "xen_hvm_suspend: sysdev_suspend failed: %d\n",
49 err);
50 return err;
51 }
52
53 *cancelled = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r);
54 43
55 xen_hvm_post_suspend(*cancelled); 44static void xen_hvm_post_suspend(int cancelled)
45{
46 xen_arch_hvm_post_suspend(cancelled);
56 gnttab_resume(); 47 gnttab_resume();
48}
57 49
58 if (!*cancelled) { 50static void xen_pre_suspend(void)
59 xen_irq_resume(); 51{
60 xen_console_resume(); 52 xen_mm_pin_all();
61 xen_timer_resume(); 53 gnttab_suspend();
62 } 54 xen_arch_pre_suspend();
63 55}
64 sysdev_resume();
65 56
66 return 0; 57static void xen_post_suspend(int cancelled)
58{
59 xen_arch_post_suspend(cancelled);
60 gnttab_resume();
61 xen_mm_unpin_all();
67} 62}
68 63
64#ifdef CONFIG_PM_SLEEP
69static int xen_suspend(void *data) 65static int xen_suspend(void *data)
70{ 66{
67 struct suspend_info *si = data;
71 int err; 68 int err;
72 int *cancelled = data;
73 69
74 BUG_ON(!irqs_disabled()); 70 BUG_ON(!irqs_disabled());
75 71
@@ -80,22 +76,20 @@ static int xen_suspend(void *data)
80 return err; 76 return err;
81 } 77 }
82 78
83 xen_mm_pin_all(); 79 if (si->pre)
84 gnttab_suspend(); 80 si->pre();
85 xen_pre_suspend();
86 81
87 /* 82 /*
88 * This hypercall returns 1 if suspend was cancelled 83 * This hypercall returns 1 if suspend was cancelled
89 * or the domain was merely checkpointed, and 0 if it 84 * or the domain was merely checkpointed, and 0 if it
90 * is resuming in a new domain. 85 * is resuming in a new domain.
91 */ 86 */
92 *cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info)); 87 si->cancelled = HYPERVISOR_suspend(si->arg);
93 88
94 xen_post_suspend(*cancelled); 89 if (si->post)
95 gnttab_resume(); 90 si->post(si->cancelled);
96 xen_mm_unpin_all();
97 91
98 if (!*cancelled) { 92 if (!si->cancelled) {
99 xen_irq_resume(); 93 xen_irq_resume();
100 xen_console_resume(); 94 xen_console_resume();
101 xen_timer_resume(); 95 xen_timer_resume();
@@ -109,7 +103,7 @@ static int xen_suspend(void *data)
109static void do_suspend(void) 103static void do_suspend(void)
110{ 104{
111 int err; 105 int err;
112 int cancelled = 1; 106 struct suspend_info si;
113 107
114 shutting_down = SHUTDOWN_SUSPEND; 108 shutting_down = SHUTDOWN_SUSPEND;
115 109
@@ -139,20 +133,29 @@ static void do_suspend(void)
139 goto out_resume; 133 goto out_resume;
140 } 134 }
141 135
142 if (xen_hvm_domain()) 136 si.cancelled = 1;
143 err = stop_machine(xen_hvm_suspend, &cancelled, cpumask_of(0)); 137
144 else 138 if (xen_hvm_domain()) {
145 err = stop_machine(xen_suspend, &cancelled, cpumask_of(0)); 139 si.arg = 0UL;
140 si.pre = NULL;
141 si.post = &xen_hvm_post_suspend;
142 } else {
143 si.arg = virt_to_mfn(xen_start_info);
144 si.pre = &xen_pre_suspend;
145 si.post = &xen_post_suspend;
146 }
147
148 err = stop_machine(xen_suspend, &si, cpumask_of(0));
146 149
147 dpm_resume_noirq(PMSG_RESUME); 150 dpm_resume_noirq(PMSG_RESUME);
148 151
149 if (err) { 152 if (err) {
150 printk(KERN_ERR "failed to start xen_suspend: %d\n", err); 153 printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
151 cancelled = 1; 154 si.cancelled = 1;
152 } 155 }
153 156
154out_resume: 157out_resume:
155 if (!cancelled) { 158 if (!si.cancelled) {
156 xen_arch_resume(); 159 xen_arch_resume();
157 xs_resume(); 160 xs_resume();
158 } else 161 } else
@@ -172,12 +175,39 @@ out:
172} 175}
173#endif /* CONFIG_PM_SLEEP */ 176#endif /* CONFIG_PM_SLEEP */
174 177
178struct shutdown_handler {
179 const char *command;
180 void (*cb)(void);
181};
182
183static void do_poweroff(void)
184{
185 shutting_down = SHUTDOWN_POWEROFF;
186 orderly_poweroff(false);
187}
188
189static void do_reboot(void)
190{
191 shutting_down = SHUTDOWN_POWEROFF; /* ? */
192 ctrl_alt_del();
193}
194
175static void shutdown_handler(struct xenbus_watch *watch, 195static void shutdown_handler(struct xenbus_watch *watch,
176 const char **vec, unsigned int len) 196 const char **vec, unsigned int len)
177{ 197{
178 char *str; 198 char *str;
179 struct xenbus_transaction xbt; 199 struct xenbus_transaction xbt;
180 int err; 200 int err;
201 static struct shutdown_handler handlers[] = {
202 { "poweroff", do_poweroff },
203 { "halt", do_poweroff },
204 { "reboot", do_reboot },
205#ifdef CONFIG_PM_SLEEP
206 { "suspend", do_suspend },
207#endif
208 {NULL, NULL},
209 };
210 static struct shutdown_handler *handler;
181 211
182 if (shutting_down != SHUTDOWN_INVALID) 212 if (shutting_down != SHUTDOWN_INVALID)
183 return; 213 return;
@@ -194,7 +224,14 @@ static void shutdown_handler(struct xenbus_watch *watch,
194 return; 224 return;
195 } 225 }
196 226
197 xenbus_write(xbt, "control", "shutdown", ""); 227 for (handler = &handlers[0]; handler->command; handler++) {
228 if (strcmp(str, handler->command) == 0)
229 break;
230 }
231
232 /* Only acknowledge commands which we are prepared to handle. */
233 if (handler->cb)
234 xenbus_write(xbt, "control", "shutdown", "");
198 235
199 err = xenbus_transaction_end(xbt, 0); 236 err = xenbus_transaction_end(xbt, 0);
200 if (err == -EAGAIN) { 237 if (err == -EAGAIN) {
@@ -202,17 +239,8 @@ static void shutdown_handler(struct xenbus_watch *watch,
202 goto again; 239 goto again;
203 } 240 }
204 241
205 if (strcmp(str, "poweroff") == 0 || 242 if (handler->cb) {
206 strcmp(str, "halt") == 0) { 243 handler->cb();
207 shutting_down = SHUTDOWN_POWEROFF;
208 orderly_poweroff(false);
209 } else if (strcmp(str, "reboot") == 0) {
210 shutting_down = SHUTDOWN_POWEROFF; /* ? */
211 ctrl_alt_del();
212#ifdef CONFIG_PM_SLEEP
213 } else if (strcmp(str, "suspend") == 0) {
214 do_suspend();
215#endif
216 } else { 244 } else {
217 printk(KERN_INFO "Ignoring shutdown request: %s\n", str); 245 printk(KERN_INFO "Ignoring shutdown request: %s\n", str);
218 shutting_down = SHUTDOWN_INVALID; 246 shutting_down = SHUTDOWN_INVALID;
@@ -291,27 +319,18 @@ static int shutdown_event(struct notifier_block *notifier,
291 return NOTIFY_DONE; 319 return NOTIFY_DONE;
292} 320}
293 321
294static int __init __setup_shutdown_event(void)
295{
296 /* Delay initialization in the PV on HVM case */
297 if (xen_hvm_domain())
298 return 0;
299
300 if (!xen_pv_domain())
301 return -ENODEV;
302
303 return xen_setup_shutdown_event();
304}
305
306int xen_setup_shutdown_event(void) 322int xen_setup_shutdown_event(void)
307{ 323{
308 static struct notifier_block xenstore_notifier = { 324 static struct notifier_block xenstore_notifier = {
309 .notifier_call = shutdown_event 325 .notifier_call = shutdown_event
310 }; 326 };
327
328 if (!xen_domain())
329 return -ENODEV;
311 register_xenstore_notifier(&xenstore_notifier); 330 register_xenstore_notifier(&xenstore_notifier);
312 331
313 return 0; 332 return 0;
314} 333}
315EXPORT_SYMBOL_GPL(xen_setup_shutdown_event); 334EXPORT_SYMBOL_GPL(xen_setup_shutdown_event);
316 335
317subsys_initcall(__setup_shutdown_event); 336subsys_initcall(xen_setup_shutdown_event);
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
index afbe041f42c5..319dd0a94d51 100644
--- a/drivers/xen/platform-pci.c
+++ b/drivers/xen/platform-pci.c
@@ -156,9 +156,6 @@ static int __devinit platform_pci_init(struct pci_dev *pdev,
156 if (ret) 156 if (ret)
157 goto out; 157 goto out;
158 xenbus_probe(NULL); 158 xenbus_probe(NULL);
159 ret = xen_setup_shutdown_event();
160 if (ret)
161 goto out;
162 return 0; 159 return 0;
163 160
164out: 161out:
diff --git a/fs/Kconfig b/fs/Kconfig
index 3db9caa57edc..7cb53aafac1e 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -47,7 +47,7 @@ config FS_POSIX_ACL
47 def_bool n 47 def_bool n
48 48
49config EXPORTFS 49config EXPORTFS
50 tristate 50 bool
51 51
52config FILE_LOCKING 52config FILE_LOCKING
53 bool "Enable POSIX file locking API" if EXPERT 53 bool "Enable POSIX file locking API" if EXPERT
diff --git a/fs/Makefile b/fs/Makefile
index a7f7cef0c0c8..ba01202844c5 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -48,6 +48,8 @@ obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o
48obj-$(CONFIG_NFS_COMMON) += nfs_common/ 48obj-$(CONFIG_NFS_COMMON) += nfs_common/
49obj-$(CONFIG_GENERIC_ACL) += generic_acl.o 49obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
50 50
51obj-$(CONFIG_FHANDLE) += fhandle.o
52
51obj-y += quota/ 53obj-y += quota/
52 54
53obj-$(CONFIG_PROC_FS) += proc/ 55obj-$(CONFIG_PROC_FS) += proc/
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 15690bb1d3b5..789b3afb3423 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -140,6 +140,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
140 candidate->first = candidate->last = index; 140 candidate->first = candidate->last = index;
141 candidate->offset_first = from; 141 candidate->offset_first = from;
142 candidate->to_last = to; 142 candidate->to_last = to;
143 INIT_LIST_HEAD(&candidate->link);
143 candidate->usage = 1; 144 candidate->usage = 1;
144 candidate->state = AFS_WBACK_PENDING; 145 candidate->state = AFS_WBACK_PENDING;
145 init_waitqueue_head(&candidate->waitq); 146 init_waitqueue_head(&candidate->waitq);
diff --git a/fs/aio.c b/fs/aio.c
index fc557a3be0a9..26869cde3953 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -239,15 +239,23 @@ static void __put_ioctx(struct kioctx *ctx)
239 call_rcu(&ctx->rcu_head, ctx_rcu_free); 239 call_rcu(&ctx->rcu_head, ctx_rcu_free);
240} 240}
241 241
242#define get_ioctx(kioctx) do { \ 242static inline void get_ioctx(struct kioctx *kioctx)
243 BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ 243{
244 atomic_inc(&(kioctx)->users); \ 244 BUG_ON(atomic_read(&kioctx->users) <= 0);
245} while (0) 245 atomic_inc(&kioctx->users);
246#define put_ioctx(kioctx) do { \ 246}
247 BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ 247
248 if (unlikely(atomic_dec_and_test(&(kioctx)->users))) \ 248static inline int try_get_ioctx(struct kioctx *kioctx)
249 __put_ioctx(kioctx); \ 249{
250} while (0) 250 return atomic_inc_not_zero(&kioctx->users);
251}
252
253static inline void put_ioctx(struct kioctx *kioctx)
254{
255 BUG_ON(atomic_read(&kioctx->users) <= 0);
256 if (unlikely(atomic_dec_and_test(&kioctx->users)))
257 __put_ioctx(kioctx);
258}
251 259
252/* ioctx_alloc 260/* ioctx_alloc
253 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. 261 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed.
@@ -601,8 +609,13 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
601 rcu_read_lock(); 609 rcu_read_lock();
602 610
603 hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) { 611 hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
604 if (ctx->user_id == ctx_id && !ctx->dead) { 612 /*
605 get_ioctx(ctx); 613 * RCU protects us against accessing freed memory but
614 * we have to be careful not to get a reference when the
615 * reference count already dropped to 0 (ctx->dead test
616 * is unreliable because of races).
617 */
618 if (ctx->user_id == ctx_id && !ctx->dead && try_get_ioctx(ctx)){
606 ret = ctx; 619 ret = ctx;
607 break; 620 break;
608 } 621 }
@@ -1629,6 +1642,23 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1629 goto out_put_req; 1642 goto out_put_req;
1630 1643
1631 spin_lock_irq(&ctx->ctx_lock); 1644 spin_lock_irq(&ctx->ctx_lock);
1645 /*
1646 * We could have raced with io_destroy() and are currently holding a
1647 * reference to ctx which should be destroyed. We cannot submit IO
1648 * since ctx gets freed as soon as io_submit() puts its reference. The
1649 * check here is reliable: io_destroy() sets ctx->dead before waiting
1650 * for outstanding IO and the barrier between these two is realized by
1651 * unlock of mm->ioctx_lock and lock of ctx->ctx_lock. Analogously we
1652 * increment ctx->reqs_active before checking for ctx->dead and the
1653 * barrier is realized by unlock and lock of ctx->ctx_lock. Thus if we
1654 * don't see ctx->dead set here, io_destroy() waits for our IO to
1655 * finish.
1656 */
1657 if (ctx->dead) {
1658 spin_unlock_irq(&ctx->ctx_lock);
1659 ret = -EINVAL;
1660 goto out_put_req;
1661 }
1632 aio_run_iocb(req); 1662 aio_run_iocb(req);
1633 if (!list_empty(&ctx->run_list)) { 1663 if (!list_empty(&ctx->run_list)) {
1634 /* drain the run list */ 1664 /* drain the run list */
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4fb8a3431531..889287019599 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -873,6 +873,11 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
873 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); 873 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
874 if (ret) 874 if (ret)
875 goto out_del; 875 goto out_del;
876 /*
877 * bdev could be deleted beneath us which would implicitly destroy
878 * the holder directory. Hold on to it.
879 */
880 kobject_get(bdev->bd_part->holder_dir);
876 881
877 list_add(&holder->list, &bdev->bd_holder_disks); 882 list_add(&holder->list, &bdev->bd_holder_disks);
878 goto out_unlock; 883 goto out_unlock;
@@ -909,6 +914,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
909 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); 914 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
910 del_symlink(bdev->bd_part->holder_dir, 915 del_symlink(bdev->bd_part->holder_dir,
911 &disk_to_dev(disk)->kobj); 916 &disk_to_dev(disk)->kobj);
917 kobject_put(bdev->bd_part->holder_dir);
912 list_del_init(&holder->list); 918 list_del_init(&holder->list);
913 kfree(holder); 919 kfree(holder);
914 } 920 }
@@ -922,14 +928,15 @@ EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
922 * flush_disk - invalidates all buffer-cache entries on a disk 928 * flush_disk - invalidates all buffer-cache entries on a disk
923 * 929 *
924 * @bdev: struct block device to be flushed 930 * @bdev: struct block device to be flushed
931 * @kill_dirty: flag to guide handling of dirty inodes
925 * 932 *
926 * Invalidates all buffer-cache entries on a disk. It should be called 933 * Invalidates all buffer-cache entries on a disk. It should be called
927 * when a disk has been changed -- either by a media change or online 934 * when a disk has been changed -- either by a media change or online
928 * resize. 935 * resize.
929 */ 936 */
930static void flush_disk(struct block_device *bdev) 937static void flush_disk(struct block_device *bdev, bool kill_dirty)
931{ 938{
932 if (__invalidate_device(bdev)) { 939 if (__invalidate_device(bdev, kill_dirty)) {
933 char name[BDEVNAME_SIZE] = ""; 940 char name[BDEVNAME_SIZE] = "";
934 941
935 if (bdev->bd_disk) 942 if (bdev->bd_disk)
@@ -966,7 +973,7 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
966 "%s: detected capacity change from %lld to %lld\n", 973 "%s: detected capacity change from %lld to %lld\n",
967 name, bdev_size, disk_size); 974 name, bdev_size, disk_size);
968 i_size_write(bdev->bd_inode, disk_size); 975 i_size_write(bdev->bd_inode, disk_size);
969 flush_disk(bdev); 976 flush_disk(bdev, false);
970 } 977 }
971} 978}
972EXPORT_SYMBOL(check_disk_size_change); 979EXPORT_SYMBOL(check_disk_size_change);
@@ -1019,7 +1026,7 @@ int check_disk_change(struct block_device *bdev)
1019 if (!(events & DISK_EVENT_MEDIA_CHANGE)) 1026 if (!(events & DISK_EVENT_MEDIA_CHANGE))
1020 return 0; 1027 return 0;
1021 1028
1022 flush_disk(bdev); 1029 flush_disk(bdev, true);
1023 if (bdops->revalidate_disk) 1030 if (bdops->revalidate_disk)
1024 bdops->revalidate_disk(bdev->bd_disk); 1031 bdops->revalidate_disk(bdev->bd_disk);
1025 return 1; 1032 return 1;
@@ -1600,7 +1607,7 @@ fail:
1600} 1607}
1601EXPORT_SYMBOL(lookup_bdev); 1608EXPORT_SYMBOL(lookup_bdev);
1602 1609
1603int __invalidate_device(struct block_device *bdev) 1610int __invalidate_device(struct block_device *bdev, bool kill_dirty)
1604{ 1611{
1605 struct super_block *sb = get_super(bdev); 1612 struct super_block *sb = get_super(bdev);
1606 int res = 0; 1613 int res = 0;
@@ -1613,7 +1620,7 @@ int __invalidate_device(struct block_device *bdev)
1613 * hold). 1620 * hold).
1614 */ 1621 */
1615 shrink_dcache_sb(sb); 1622 shrink_dcache_sb(sb);
1616 res = invalidate_inodes(sb); 1623 res = invalidate_inodes(sb, kill_dirty);
1617 drop_super(sb); 1624 drop_super(sb);
1618 } 1625 }
1619 invalidate_bdev(bdev); 1626 invalidate_bdev(bdev);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2c98b3af6052..7f78cc78fdd0 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -729,6 +729,15 @@ struct btrfs_space_info {
729 u64 disk_total; /* total bytes on disk, takes mirrors into 729 u64 disk_total; /* total bytes on disk, takes mirrors into
730 account */ 730 account */
731 731
732 /*
733 * we bump reservation progress every time we decrement
734 * bytes_reserved. This way people waiting for reservations
735 * know something good has happened and they can check
736 * for progress. The number here isn't to be trusted, it
737 * just shows reclaim activity
738 */
739 unsigned long reservation_progress;
740
732 int full; /* indicates that we cannot allocate any more 741 int full; /* indicates that we cannot allocate any more
733 chunks for this space */ 742 chunks for this space */
734 int force_alloc; /* set if we need to force a chunk alloc for 743 int force_alloc; /* set if we need to force a chunk alloc for
@@ -1254,6 +1263,7 @@ struct btrfs_root {
1254#define BTRFS_MOUNT_SPACE_CACHE (1 << 12) 1263#define BTRFS_MOUNT_SPACE_CACHE (1 << 12)
1255#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13) 1264#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
1256#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) 1265#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
1266#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
1257 1267
1258#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1268#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1259#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1269#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -2218,6 +2228,8 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root,
2218 u64 start, u64 end); 2228 u64 start, u64 end);
2219int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, 2229int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
2220 u64 num_bytes); 2230 u64 num_bytes);
2231int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
2232 struct btrfs_root *root, u64 type);
2221 2233
2222/* ctree.c */ 2234/* ctree.c */
2223int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2235int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index ff27d7a477b2..b4ffad859adb 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -21,9 +21,13 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
21 int len = *max_len; 21 int len = *max_len;
22 int type; 22 int type;
23 23
24 if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) || 24 if (connectable && (len < BTRFS_FID_SIZE_CONNECTABLE)) {
25 (connectable && len < BTRFS_FID_SIZE_CONNECTABLE)) 25 *max_len = BTRFS_FID_SIZE_CONNECTABLE;
26 return 255; 26 return 255;
27 } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) {
28 *max_len = BTRFS_FID_SIZE_NON_CONNECTABLE;
29 return 255;
30 }
27 31
28 len = BTRFS_FID_SIZE_NON_CONNECTABLE; 32 len = BTRFS_FID_SIZE_NON_CONNECTABLE;
29 type = FILEID_BTRFS_WITHOUT_PARENT; 33 type = FILEID_BTRFS_WITHOUT_PARENT;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f3c96fc01439..7b3089b5c2df 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3342,15 +3342,16 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3342 u64 max_reclaim; 3342 u64 max_reclaim;
3343 u64 reclaimed = 0; 3343 u64 reclaimed = 0;
3344 long time_left; 3344 long time_left;
3345 int pause = 1;
3346 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; 3345 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
3347 int loops = 0; 3346 int loops = 0;
3347 unsigned long progress;
3348 3348
3349 block_rsv = &root->fs_info->delalloc_block_rsv; 3349 block_rsv = &root->fs_info->delalloc_block_rsv;
3350 space_info = block_rsv->space_info; 3350 space_info = block_rsv->space_info;
3351 3351
3352 smp_mb(); 3352 smp_mb();
3353 reserved = space_info->bytes_reserved; 3353 reserved = space_info->bytes_reserved;
3354 progress = space_info->reservation_progress;
3354 3355
3355 if (reserved == 0) 3356 if (reserved == 0)
3356 return 0; 3357 return 0;
@@ -3365,31 +3366,36 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3365 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); 3366 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
3366 3367
3367 spin_lock(&space_info->lock); 3368 spin_lock(&space_info->lock);
3368 if (reserved > space_info->bytes_reserved) { 3369 if (reserved > space_info->bytes_reserved)
3369 loops = 0;
3370 reclaimed += reserved - space_info->bytes_reserved; 3370 reclaimed += reserved - space_info->bytes_reserved;
3371 } else {
3372 loops++;
3373 }
3374 reserved = space_info->bytes_reserved; 3371 reserved = space_info->bytes_reserved;
3375 spin_unlock(&space_info->lock); 3372 spin_unlock(&space_info->lock);
3376 3373
3374 loops++;
3375
3377 if (reserved == 0 || reclaimed >= max_reclaim) 3376 if (reserved == 0 || reclaimed >= max_reclaim)
3378 break; 3377 break;
3379 3378
3380 if (trans && trans->transaction->blocked) 3379 if (trans && trans->transaction->blocked)
3381 return -EAGAIN; 3380 return -EAGAIN;
3382 3381
3383 __set_current_state(TASK_INTERRUPTIBLE); 3382 time_left = schedule_timeout_interruptible(1);
3384 time_left = schedule_timeout(pause);
3385 3383
3386 /* We were interrupted, exit */ 3384 /* We were interrupted, exit */
3387 if (time_left) 3385 if (time_left)
3388 break; 3386 break;
3389 3387
3390 pause <<= 1; 3388 /* we've kicked the IO a few times, if anything has been freed,
3391 if (pause > HZ / 10) 3389 * exit. There is no sense in looping here for a long time
3392 pause = HZ / 10; 3390 * when we really need to commit the transaction, or there are
3391 * just too many writers without enough free space
3392 */
3393
3394 if (loops > 3) {
3395 smp_mb();
3396 if (progress != space_info->reservation_progress)
3397 break;
3398 }
3393 3399
3394 } 3400 }
3395 return reclaimed >= to_reclaim; 3401 return reclaimed >= to_reclaim;
@@ -3612,6 +3618,7 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
3612 if (num_bytes) { 3618 if (num_bytes) {
3613 spin_lock(&space_info->lock); 3619 spin_lock(&space_info->lock);
3614 space_info->bytes_reserved -= num_bytes; 3620 space_info->bytes_reserved -= num_bytes;
3621 space_info->reservation_progress++;
3615 spin_unlock(&space_info->lock); 3622 spin_unlock(&space_info->lock);
3616 } 3623 }
3617 } 3624 }
@@ -3844,6 +3851,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
3844 if (block_rsv->reserved >= block_rsv->size) { 3851 if (block_rsv->reserved >= block_rsv->size) {
3845 num_bytes = block_rsv->reserved - block_rsv->size; 3852 num_bytes = block_rsv->reserved - block_rsv->size;
3846 sinfo->bytes_reserved -= num_bytes; 3853 sinfo->bytes_reserved -= num_bytes;
3854 sinfo->reservation_progress++;
3847 block_rsv->reserved = block_rsv->size; 3855 block_rsv->reserved = block_rsv->size;
3848 block_rsv->full = 1; 3856 block_rsv->full = 1;
3849 } 3857 }
@@ -4005,7 +4013,6 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4005 to_reserve = 0; 4013 to_reserve = 0;
4006 } 4014 }
4007 spin_unlock(&BTRFS_I(inode)->accounting_lock); 4015 spin_unlock(&BTRFS_I(inode)->accounting_lock);
4008
4009 to_reserve += calc_csum_metadata_size(inode, num_bytes); 4016 to_reserve += calc_csum_metadata_size(inode, num_bytes);
4010 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); 4017 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
4011 if (ret) 4018 if (ret)
@@ -4133,6 +4140,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
4133 btrfs_set_block_group_used(&cache->item, old_val); 4140 btrfs_set_block_group_used(&cache->item, old_val);
4134 cache->reserved -= num_bytes; 4141 cache->reserved -= num_bytes;
4135 cache->space_info->bytes_reserved -= num_bytes; 4142 cache->space_info->bytes_reserved -= num_bytes;
4143 cache->space_info->reservation_progress++;
4136 cache->space_info->bytes_used += num_bytes; 4144 cache->space_info->bytes_used += num_bytes;
4137 cache->space_info->disk_used += num_bytes * factor; 4145 cache->space_info->disk_used += num_bytes * factor;
4138 spin_unlock(&cache->lock); 4146 spin_unlock(&cache->lock);
@@ -4184,6 +4192,7 @@ static int pin_down_extent(struct btrfs_root *root,
4184 if (reserved) { 4192 if (reserved) {
4185 cache->reserved -= num_bytes; 4193 cache->reserved -= num_bytes;
4186 cache->space_info->bytes_reserved -= num_bytes; 4194 cache->space_info->bytes_reserved -= num_bytes;
4195 cache->space_info->reservation_progress++;
4187 } 4196 }
4188 spin_unlock(&cache->lock); 4197 spin_unlock(&cache->lock);
4189 spin_unlock(&cache->space_info->lock); 4198 spin_unlock(&cache->space_info->lock);
@@ -4234,6 +4243,7 @@ static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
4234 space_info->bytes_readonly += num_bytes; 4243 space_info->bytes_readonly += num_bytes;
4235 cache->reserved -= num_bytes; 4244 cache->reserved -= num_bytes;
4236 space_info->bytes_reserved -= num_bytes; 4245 space_info->bytes_reserved -= num_bytes;
4246 space_info->reservation_progress++;
4237 } 4247 }
4238 spin_unlock(&cache->lock); 4248 spin_unlock(&cache->lock);
4239 spin_unlock(&space_info->lock); 4249 spin_unlock(&space_info->lock);
@@ -4712,6 +4722,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4712 if (ret) { 4722 if (ret) {
4713 spin_lock(&cache->space_info->lock); 4723 spin_lock(&cache->space_info->lock);
4714 cache->space_info->bytes_reserved -= buf->len; 4724 cache->space_info->bytes_reserved -= buf->len;
4725 cache->space_info->reservation_progress++;
4715 spin_unlock(&cache->space_info->lock); 4726 spin_unlock(&cache->space_info->lock);
4716 } 4727 }
4717 goto out; 4728 goto out;
@@ -5376,7 +5387,7 @@ again:
5376 num_bytes, data, 1); 5387 num_bytes, data, 1);
5377 goto again; 5388 goto again;
5378 } 5389 }
5379 if (ret == -ENOSPC) { 5390 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
5380 struct btrfs_space_info *sinfo; 5391 struct btrfs_space_info *sinfo;
5381 5392
5382 sinfo = __find_space_info(root->fs_info, data); 5393 sinfo = __find_space_info(root->fs_info, data);
@@ -8065,6 +8076,13 @@ out:
8065 return ret; 8076 return ret;
8066} 8077}
8067 8078
8079int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
8080 struct btrfs_root *root, u64 type)
8081{
8082 u64 alloc_flags = get_alloc_profile(root, type);
8083 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
8084}
8085
8068/* 8086/*
8069 * helper to account the unused space of all the readonly block group in the 8087 * helper to account the unused space of all the readonly block group in the
8070 * list. takes mirrors into account. 8088 * list. takes mirrors into account.
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 92ac5192c518..714adc4ac4c2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1433,12 +1433,13 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1433 */ 1433 */
1434u64 count_range_bits(struct extent_io_tree *tree, 1434u64 count_range_bits(struct extent_io_tree *tree,
1435 u64 *start, u64 search_end, u64 max_bytes, 1435 u64 *start, u64 search_end, u64 max_bytes,
1436 unsigned long bits) 1436 unsigned long bits, int contig)
1437{ 1437{
1438 struct rb_node *node; 1438 struct rb_node *node;
1439 struct extent_state *state; 1439 struct extent_state *state;
1440 u64 cur_start = *start; 1440 u64 cur_start = *start;
1441 u64 total_bytes = 0; 1441 u64 total_bytes = 0;
1442 u64 last = 0;
1442 int found = 0; 1443 int found = 0;
1443 1444
1444 if (search_end <= cur_start) { 1445 if (search_end <= cur_start) {
@@ -1463,7 +1464,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
1463 state = rb_entry(node, struct extent_state, rb_node); 1464 state = rb_entry(node, struct extent_state, rb_node);
1464 if (state->start > search_end) 1465 if (state->start > search_end)
1465 break; 1466 break;
1466 if (state->end >= cur_start && (state->state & bits)) { 1467 if (contig && found && state->start > last + 1)
1468 break;
1469 if (state->end >= cur_start && (state->state & bits) == bits) {
1467 total_bytes += min(search_end, state->end) + 1 - 1470 total_bytes += min(search_end, state->end) + 1 -
1468 max(cur_start, state->start); 1471 max(cur_start, state->start);
1469 if (total_bytes >= max_bytes) 1472 if (total_bytes >= max_bytes)
@@ -1472,6 +1475,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
1472 *start = state->start; 1475 *start = state->start;
1473 found = 1; 1476 found = 1;
1474 } 1477 }
1478 last = state->end;
1479 } else if (contig && found) {
1480 break;
1475 } 1481 }
1476 node = rb_next(node); 1482 node = rb_next(node);
1477 if (!node) 1483 if (!node)
@@ -2912,6 +2918,46 @@ out:
2912 return sector; 2918 return sector;
2913} 2919}
2914 2920
2921/*
2922 * helper function for fiemap, which doesn't want to see any holes.
2923 * This maps until we find something past 'last'
2924 */
2925static struct extent_map *get_extent_skip_holes(struct inode *inode,
2926 u64 offset,
2927 u64 last,
2928 get_extent_t *get_extent)
2929{
2930 u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
2931 struct extent_map *em;
2932 u64 len;
2933
2934 if (offset >= last)
2935 return NULL;
2936
2937 while(1) {
2938 len = last - offset;
2939 if (len == 0)
2940 break;
2941 len = (len + sectorsize - 1) & ~(sectorsize - 1);
2942 em = get_extent(inode, NULL, 0, offset, len, 0);
2943 if (!em || IS_ERR(em))
2944 return em;
2945
2946 /* if this isn't a hole return it */
2947 if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
2948 em->block_start != EXTENT_MAP_HOLE) {
2949 return em;
2950 }
2951
2952 /* this is a hole, advance to the next extent */
2953 offset = extent_map_end(em);
2954 free_extent_map(em);
2955 if (offset >= last)
2956 break;
2957 }
2958 return NULL;
2959}
2960
2915int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2961int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2916 __u64 start, __u64 len, get_extent_t *get_extent) 2962 __u64 start, __u64 len, get_extent_t *get_extent)
2917{ 2963{
@@ -2921,16 +2967,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2921 u32 flags = 0; 2967 u32 flags = 0;
2922 u32 found_type; 2968 u32 found_type;
2923 u64 last; 2969 u64 last;
2970 u64 last_for_get_extent = 0;
2924 u64 disko = 0; 2971 u64 disko = 0;
2972 u64 isize = i_size_read(inode);
2925 struct btrfs_key found_key; 2973 struct btrfs_key found_key;
2926 struct extent_map *em = NULL; 2974 struct extent_map *em = NULL;
2927 struct extent_state *cached_state = NULL; 2975 struct extent_state *cached_state = NULL;
2928 struct btrfs_path *path; 2976 struct btrfs_path *path;
2929 struct btrfs_file_extent_item *item; 2977 struct btrfs_file_extent_item *item;
2930 int end = 0; 2978 int end = 0;
2931 u64 em_start = 0, em_len = 0; 2979 u64 em_start = 0;
2980 u64 em_len = 0;
2981 u64 em_end = 0;
2932 unsigned long emflags; 2982 unsigned long emflags;
2933 int hole = 0;
2934 2983
2935 if (len == 0) 2984 if (len == 0)
2936 return -EINVAL; 2985 return -EINVAL;
@@ -2940,6 +2989,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2940 return -ENOMEM; 2989 return -ENOMEM;
2941 path->leave_spinning = 1; 2990 path->leave_spinning = 1;
2942 2991
2992 /*
2993 * lookup the last file extent. We're not using i_size here
2994 * because there might be preallocation past i_size
2995 */
2943 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, 2996 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
2944 path, inode->i_ino, -1, 0); 2997 path, inode->i_ino, -1, 0);
2945 if (ret < 0) { 2998 if (ret < 0) {
@@ -2953,18 +3006,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2953 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); 3006 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
2954 found_type = btrfs_key_type(&found_key); 3007 found_type = btrfs_key_type(&found_key);
2955 3008
2956 /* No extents, just return */ 3009 /* No extents, but there might be delalloc bits */
2957 if (found_key.objectid != inode->i_ino || 3010 if (found_key.objectid != inode->i_ino ||
2958 found_type != BTRFS_EXTENT_DATA_KEY) { 3011 found_type != BTRFS_EXTENT_DATA_KEY) {
2959 btrfs_free_path(path); 3012 /* have to trust i_size as the end */
2960 return 0; 3013 last = (u64)-1;
3014 last_for_get_extent = isize;
3015 } else {
3016 /*
3017 * remember the start of the last extent. There are a
3018 * bunch of different factors that go into the length of the
3019 * extent, so its much less complex to remember where it started
3020 */
3021 last = found_key.offset;
3022 last_for_get_extent = last + 1;
2961 } 3023 }
2962 last = found_key.offset;
2963 btrfs_free_path(path); 3024 btrfs_free_path(path);
2964 3025
3026 /*
3027 * we might have some extents allocated but more delalloc past those
3028 * extents. so, we trust isize unless the start of the last extent is
3029 * beyond isize
3030 */
3031 if (last < isize) {
3032 last = (u64)-1;
3033 last_for_get_extent = isize;
3034 }
3035
2965 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, 3036 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
2966 &cached_state, GFP_NOFS); 3037 &cached_state, GFP_NOFS);
2967 em = get_extent(inode, NULL, 0, off, max - off, 0); 3038
3039 em = get_extent_skip_holes(inode, off, last_for_get_extent,
3040 get_extent);
2968 if (!em) 3041 if (!em)
2969 goto out; 3042 goto out;
2970 if (IS_ERR(em)) { 3043 if (IS_ERR(em)) {
@@ -2973,22 +3046,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2973 } 3046 }
2974 3047
2975 while (!end) { 3048 while (!end) {
2976 hole = 0; 3049 u64 offset_in_extent;
2977 off = em->start + em->len;
2978 if (off >= max)
2979 end = 1;
2980 3050
2981 if (em->block_start == EXTENT_MAP_HOLE) { 3051 /* break if the extent we found is outside the range */
2982 hole = 1; 3052 if (em->start >= max || extent_map_end(em) < off)
2983 goto next; 3053 break;
2984 }
2985 3054
2986 em_start = em->start; 3055 /*
2987 em_len = em->len; 3056 * get_extent may return an extent that starts before our
3057 * requested range. We have to make sure the ranges
3058 * we return to fiemap always move forward and don't
3059 * overlap, so adjust the offsets here
3060 */
3061 em_start = max(em->start, off);
2988 3062
3063 /*
3064 * record the offset from the start of the extent
3065 * for adjusting the disk offset below
3066 */
3067 offset_in_extent = em_start - em->start;
3068 em_end = extent_map_end(em);
3069 em_len = em_end - em_start;
3070 emflags = em->flags;
2989 disko = 0; 3071 disko = 0;
2990 flags = 0; 3072 flags = 0;
2991 3073
3074 /*
3075 * bump off for our next call to get_extent
3076 */
3077 off = extent_map_end(em);
3078 if (off >= max)
3079 end = 1;
3080
2992 if (em->block_start == EXTENT_MAP_LAST_BYTE) { 3081 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
2993 end = 1; 3082 end = 1;
2994 flags |= FIEMAP_EXTENT_LAST; 3083 flags |= FIEMAP_EXTENT_LAST;
@@ -2999,42 +3088,34 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2999 flags |= (FIEMAP_EXTENT_DELALLOC | 3088 flags |= (FIEMAP_EXTENT_DELALLOC |
3000 FIEMAP_EXTENT_UNKNOWN); 3089 FIEMAP_EXTENT_UNKNOWN);
3001 } else { 3090 } else {
3002 disko = em->block_start; 3091 disko = em->block_start + offset_in_extent;
3003 } 3092 }
3004 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 3093 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
3005 flags |= FIEMAP_EXTENT_ENCODED; 3094 flags |= FIEMAP_EXTENT_ENCODED;
3006 3095
3007next:
3008 emflags = em->flags;
3009 free_extent_map(em); 3096 free_extent_map(em);
3010 em = NULL; 3097 em = NULL;
3011 if (!end) { 3098 if ((em_start >= last) || em_len == (u64)-1 ||
3012 em = get_extent(inode, NULL, 0, off, max - off, 0); 3099 (last == (u64)-1 && isize <= em_end)) {
3013 if (!em)
3014 goto out;
3015 if (IS_ERR(em)) {
3016 ret = PTR_ERR(em);
3017 goto out;
3018 }
3019 emflags = em->flags;
3020 }
3021
3022 if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
3023 flags |= FIEMAP_EXTENT_LAST; 3100 flags |= FIEMAP_EXTENT_LAST;
3024 end = 1; 3101 end = 1;
3025 } 3102 }
3026 3103
3027 if (em_start == last) { 3104 /* now scan forward to see if this is really the last extent. */
3105 em = get_extent_skip_holes(inode, off, last_for_get_extent,
3106 get_extent);
3107 if (IS_ERR(em)) {
3108 ret = PTR_ERR(em);
3109 goto out;
3110 }
3111 if (!em) {
3028 flags |= FIEMAP_EXTENT_LAST; 3112 flags |= FIEMAP_EXTENT_LAST;
3029 end = 1; 3113 end = 1;
3030 } 3114 }
3031 3115 ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
3032 if (!hole) { 3116 em_len, flags);
3033 ret = fiemap_fill_next_extent(fieinfo, em_start, disko, 3117 if (ret)
3034 em_len, flags); 3118 goto out_free;
3035 if (ret)
3036 goto out_free;
3037 }
3038 } 3119 }
3039out_free: 3120out_free:
3040 free_extent_map(em); 3121 free_extent_map(em);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 7083cfafd061..9318dfefd59c 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -191,7 +191,7 @@ void extent_io_exit(void);
191 191
192u64 count_range_bits(struct extent_io_tree *tree, 192u64 count_range_bits(struct extent_io_tree *tree,
193 u64 *start, u64 search_end, 193 u64 *start, u64 search_end,
194 u64 max_bytes, unsigned long bits); 194 u64 max_bytes, unsigned long bits, int contig);
195 195
196void free_extent_state(struct extent_state *state); 196void free_extent_state(struct extent_state *state);
197int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 197int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 7084140d5940..f447b783bb84 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -70,6 +70,19 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
70 70
71 /* Flush processor's dcache for this page */ 71 /* Flush processor's dcache for this page */
72 flush_dcache_page(page); 72 flush_dcache_page(page);
73
74 /*
75 * if we get a partial write, we can end up with
76 * partially up to date pages. These add
77 * a lot of complexity, so make sure they don't
78 * happen by forcing this copy to be retried.
79 *
80 * The rest of the btrfs_file_write code will fall
81 * back to page at a time copies after we return 0.
82 */
83 if (!PageUptodate(page) && copied < count)
84 copied = 0;
85
73 iov_iter_advance(i, copied); 86 iov_iter_advance(i, copied);
74 write_bytes -= copied; 87 write_bytes -= copied;
75 total_copied += copied; 88 total_copied += copied;
@@ -763,6 +776,27 @@ out:
763} 776}
764 777
765/* 778/*
779 * on error we return an unlocked page and the error value
780 * on success we return a locked page and 0
781 */
782static int prepare_uptodate_page(struct page *page, u64 pos)
783{
784 int ret = 0;
785
786 if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) {
787 ret = btrfs_readpage(NULL, page);
788 if (ret)
789 return ret;
790 lock_page(page);
791 if (!PageUptodate(page)) {
792 unlock_page(page);
793 return -EIO;
794 }
795 }
796 return 0;
797}
798
799/*
766 * this gets pages into the page cache and locks them down, it also properly 800 * this gets pages into the page cache and locks them down, it also properly
767 * waits for data=ordered extents to finish before allowing the pages to be 801 * waits for data=ordered extents to finish before allowing the pages to be
768 * modified. 802 * modified.
@@ -777,6 +811,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
777 unsigned long index = pos >> PAGE_CACHE_SHIFT; 811 unsigned long index = pos >> PAGE_CACHE_SHIFT;
778 struct inode *inode = fdentry(file)->d_inode; 812 struct inode *inode = fdentry(file)->d_inode;
779 int err = 0; 813 int err = 0;
814 int faili = 0;
780 u64 start_pos; 815 u64 start_pos;
781 u64 last_pos; 816 u64 last_pos;
782 817
@@ -794,15 +829,24 @@ again:
794 for (i = 0; i < num_pages; i++) { 829 for (i = 0; i < num_pages; i++) {
795 pages[i] = grab_cache_page(inode->i_mapping, index + i); 830 pages[i] = grab_cache_page(inode->i_mapping, index + i);
796 if (!pages[i]) { 831 if (!pages[i]) {
797 int c; 832 faili = i - 1;
798 for (c = i - 1; c >= 0; c--) { 833 err = -ENOMEM;
799 unlock_page(pages[c]); 834 goto fail;
800 page_cache_release(pages[c]); 835 }
801 } 836
802 return -ENOMEM; 837 if (i == 0)
838 err = prepare_uptodate_page(pages[i], pos);
839 if (i == num_pages - 1)
840 err = prepare_uptodate_page(pages[i],
841 pos + write_bytes);
842 if (err) {
843 page_cache_release(pages[i]);
844 faili = i - 1;
845 goto fail;
803 } 846 }
804 wait_on_page_writeback(pages[i]); 847 wait_on_page_writeback(pages[i]);
805 } 848 }
849 err = 0;
806 if (start_pos < inode->i_size) { 850 if (start_pos < inode->i_size) {
807 struct btrfs_ordered_extent *ordered; 851 struct btrfs_ordered_extent *ordered;
808 lock_extent_bits(&BTRFS_I(inode)->io_tree, 852 lock_extent_bits(&BTRFS_I(inode)->io_tree,
@@ -842,6 +886,14 @@ again:
842 WARN_ON(!PageLocked(pages[i])); 886 WARN_ON(!PageLocked(pages[i]));
843 } 887 }
844 return 0; 888 return 0;
889fail:
890 while (faili >= 0) {
891 unlock_page(pages[faili]);
892 page_cache_release(pages[faili]);
893 faili--;
894 }
895 return err;
896
845} 897}
846 898
847static ssize_t btrfs_file_aio_write(struct kiocb *iocb, 899static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
@@ -851,7 +903,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
851 struct file *file = iocb->ki_filp; 903 struct file *file = iocb->ki_filp;
852 struct inode *inode = fdentry(file)->d_inode; 904 struct inode *inode = fdentry(file)->d_inode;
853 struct btrfs_root *root = BTRFS_I(inode)->root; 905 struct btrfs_root *root = BTRFS_I(inode)->root;
854 struct page *pinned[2];
855 struct page **pages = NULL; 906 struct page **pages = NULL;
856 struct iov_iter i; 907 struct iov_iter i;
857 loff_t *ppos = &iocb->ki_pos; 908 loff_t *ppos = &iocb->ki_pos;
@@ -872,9 +923,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
872 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || 923 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
873 (file->f_flags & O_DIRECT)); 924 (file->f_flags & O_DIRECT));
874 925
875 pinned[0] = NULL;
876 pinned[1] = NULL;
877
878 start_pos = pos; 926 start_pos = pos;
879 927
880 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); 928 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
@@ -962,32 +1010,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
962 first_index = pos >> PAGE_CACHE_SHIFT; 1010 first_index = pos >> PAGE_CACHE_SHIFT;
963 last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; 1011 last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
964 1012
965 /*
966 * there are lots of better ways to do this, but this code
967 * makes sure the first and last page in the file range are
968 * up to date and ready for cow
969 */
970 if ((pos & (PAGE_CACHE_SIZE - 1))) {
971 pinned[0] = grab_cache_page(inode->i_mapping, first_index);
972 if (!PageUptodate(pinned[0])) {
973 ret = btrfs_readpage(NULL, pinned[0]);
974 BUG_ON(ret);
975 wait_on_page_locked(pinned[0]);
976 } else {
977 unlock_page(pinned[0]);
978 }
979 }
980 if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
981 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
982 if (!PageUptodate(pinned[1])) {
983 ret = btrfs_readpage(NULL, pinned[1]);
984 BUG_ON(ret);
985 wait_on_page_locked(pinned[1]);
986 } else {
987 unlock_page(pinned[1]);
988 }
989 }
990
991 while (iov_iter_count(&i) > 0) { 1013 while (iov_iter_count(&i) > 0) {
992 size_t offset = pos & (PAGE_CACHE_SIZE - 1); 1014 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
993 size_t write_bytes = min(iov_iter_count(&i), 1015 size_t write_bytes = min(iov_iter_count(&i),
@@ -1024,8 +1046,20 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1024 1046
1025 copied = btrfs_copy_from_user(pos, num_pages, 1047 copied = btrfs_copy_from_user(pos, num_pages,
1026 write_bytes, pages, &i); 1048 write_bytes, pages, &i);
1027 dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >> 1049
1028 PAGE_CACHE_SHIFT; 1050 /*
1051 * if we have trouble faulting in the pages, fall
1052 * back to one page at a time
1053 */
1054 if (copied < write_bytes)
1055 nrptrs = 1;
1056
1057 if (copied == 0)
1058 dirty_pages = 0;
1059 else
1060 dirty_pages = (copied + offset +
1061 PAGE_CACHE_SIZE - 1) >>
1062 PAGE_CACHE_SHIFT;
1029 1063
1030 if (num_pages > dirty_pages) { 1064 if (num_pages > dirty_pages) {
1031 if (copied > 0) 1065 if (copied > 0)
@@ -1069,10 +1103,6 @@ out:
1069 err = ret; 1103 err = ret;
1070 1104
1071 kfree(pages); 1105 kfree(pages);
1072 if (pinned[0])
1073 page_cache_release(pinned[0]);
1074 if (pinned[1])
1075 page_cache_release(pinned[1]);
1076 *ppos = pos; 1106 *ppos = pos;
1077 1107
1078 /* 1108 /*
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index fb9bd7832b6d..4a0107e18747 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1913,7 +1913,7 @@ static int btrfs_clean_io_failures(struct inode *inode, u64 start)
1913 1913
1914 private = 0; 1914 private = 0;
1915 if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, 1915 if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
1916 (u64)-1, 1, EXTENT_DIRTY)) { 1916 (u64)-1, 1, EXTENT_DIRTY, 0)) {
1917 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, 1917 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
1918 start, &private_failure); 1918 start, &private_failure);
1919 if (ret == 0) { 1919 if (ret == 0) {
@@ -4806,9 +4806,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4806 int err; 4806 int err;
4807 int drop_inode = 0; 4807 int drop_inode = 0;
4808 4808
4809 if (inode->i_nlink == 0)
4810 return -ENOENT;
4811
4812 /* do not allow sys_link's with other subvols of the same device */ 4809 /* do not allow sys_link's with other subvols of the same device */
4813 if (root->objectid != BTRFS_I(inode)->root->objectid) 4810 if (root->objectid != BTRFS_I(inode)->root->objectid)
4814 return -EPERM; 4811 return -EPERM;
@@ -4821,10 +4818,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4821 goto fail; 4818 goto fail;
4822 4819
4823 /* 4820 /*
4824 * 1 item for inode ref 4821 * 2 items for inode and inode ref
4825 * 2 items for dir items 4822 * 2 items for dir items
4823 * 1 item for parent inode
4826 */ 4824 */
4827 trans = btrfs_start_transaction(root, 3); 4825 trans = btrfs_start_transaction(root, 5);
4828 if (IS_ERR(trans)) { 4826 if (IS_ERR(trans)) {
4829 err = PTR_ERR(trans); 4827 err = PTR_ERR(trans);
4830 goto fail; 4828 goto fail;
@@ -5280,6 +5278,128 @@ out:
5280 return em; 5278 return em;
5281} 5279}
5282 5280
5281struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
5282 size_t pg_offset, u64 start, u64 len,
5283 int create)
5284{
5285 struct extent_map *em;
5286 struct extent_map *hole_em = NULL;
5287 u64 range_start = start;
5288 u64 end;
5289 u64 found;
5290 u64 found_end;
5291 int err = 0;
5292
5293 em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
5294 if (IS_ERR(em))
5295 return em;
5296 if (em) {
5297 /*
5298 * if our em maps to a hole, there might
5299 * actually be delalloc bytes behind it
5300 */
5301 if (em->block_start != EXTENT_MAP_HOLE)
5302 return em;
5303 else
5304 hole_em = em;
5305 }
5306
5307 /* check to see if we've wrapped (len == -1 or similar) */
5308 end = start + len;
5309 if (end < start)
5310 end = (u64)-1;
5311 else
5312 end -= 1;
5313
5314 em = NULL;
5315
5316 /* ok, we didn't find anything, lets look for delalloc */
5317 found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
5318 end, len, EXTENT_DELALLOC, 1);
5319 found_end = range_start + found;
5320 if (found_end < range_start)
5321 found_end = (u64)-1;
5322
5323 /*
5324 * we didn't find anything useful, return
5325 * the original results from get_extent()
5326 */
5327 if (range_start > end || found_end <= start) {
5328 em = hole_em;
5329 hole_em = NULL;
5330 goto out;
5331 }
5332
5333 /* adjust the range_start to make sure it doesn't
5334 * go backwards from the start they passed in
5335 */
5336 range_start = max(start,range_start);
5337 found = found_end - range_start;
5338
5339 if (found > 0) {
5340 u64 hole_start = start;
5341 u64 hole_len = len;
5342
5343 em = alloc_extent_map(GFP_NOFS);
5344 if (!em) {
5345 err = -ENOMEM;
5346 goto out;
5347 }
5348 /*
5349 * when btrfs_get_extent can't find anything it
5350 * returns one huge hole
5351 *
5352 * make sure what it found really fits our range, and
5353 * adjust to make sure it is based on the start from
5354 * the caller
5355 */
5356 if (hole_em) {
5357 u64 calc_end = extent_map_end(hole_em);
5358
5359 if (calc_end <= start || (hole_em->start > end)) {
5360 free_extent_map(hole_em);
5361 hole_em = NULL;
5362 } else {
5363 hole_start = max(hole_em->start, start);
5364 hole_len = calc_end - hole_start;
5365 }
5366 }
5367 em->bdev = NULL;
5368 if (hole_em && range_start > hole_start) {
5369 /* our hole starts before our delalloc, so we
5370 * have to return just the parts of the hole
5371 * that go until the delalloc starts
5372 */
5373 em->len = min(hole_len,
5374 range_start - hole_start);
5375 em->start = hole_start;
5376 em->orig_start = hole_start;
5377 /*
5378 * don't adjust block start at all,
5379 * it is fixed at EXTENT_MAP_HOLE
5380 */
5381 em->block_start = hole_em->block_start;
5382 em->block_len = hole_len;
5383 } else {
5384 em->start = range_start;
5385 em->len = found;
5386 em->orig_start = range_start;
5387 em->block_start = EXTENT_MAP_DELALLOC;
5388 em->block_len = found;
5389 }
5390 } else if (hole_em) {
5391 return hole_em;
5392 }
5393out:
5394
5395 free_extent_map(hole_em);
5396 if (err) {
5397 free_extent_map(em);
5398 return ERR_PTR(err);
5399 }
5400 return em;
5401}
5402
5283static struct extent_map *btrfs_new_extent_direct(struct inode *inode, 5403static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5284 u64 start, u64 len) 5404 u64 start, u64 len)
5285{ 5405{
@@ -5934,6 +6054,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
5934 if (!skip_sum) { 6054 if (!skip_sum) {
5935 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); 6055 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
5936 if (!dip->csums) { 6056 if (!dip->csums) {
6057 kfree(dip);
5937 ret = -ENOMEM; 6058 ret = -ENOMEM;
5938 goto free_ordered; 6059 goto free_ordered;
5939 } 6060 }
@@ -6102,7 +6223,7 @@ out:
6102static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 6223static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
6103 __u64 start, __u64 len) 6224 __u64 start, __u64 len)
6104{ 6225{
6105 return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent); 6226 return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
6106} 6227}
6107 6228
6108int btrfs_readpage(struct file *file, struct page *page) 6229int btrfs_readpage(struct file *file, struct page *page)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index be2d4f6aaa5e..5fdb2abc4fa7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1071,12 +1071,15 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1071 if (copy_from_user(&flags, arg, sizeof(flags))) 1071 if (copy_from_user(&flags, arg, sizeof(flags)))
1072 return -EFAULT; 1072 return -EFAULT;
1073 1073
1074 if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC) 1074 if (flags & BTRFS_SUBVOL_CREATE_ASYNC)
1075 return -EINVAL; 1075 return -EINVAL;
1076 1076
1077 if (flags & ~BTRFS_SUBVOL_RDONLY) 1077 if (flags & ~BTRFS_SUBVOL_RDONLY)
1078 return -EOPNOTSUPP; 1078 return -EOPNOTSUPP;
1079 1079
1080 if (!is_owner_or_cap(inode))
1081 return -EACCES;
1082
1080 down_write(&root->fs_info->subvol_sem); 1083 down_write(&root->fs_info->subvol_sem);
1081 1084
1082 /* nothing to do */ 1085 /* nothing to do */
@@ -1097,7 +1100,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1097 goto out_reset; 1100 goto out_reset;
1098 } 1101 }
1099 1102
1100 ret = btrfs_update_root(trans, root, 1103 ret = btrfs_update_root(trans, root->fs_info->tree_root,
1101 &root->root_key, &root->root_item); 1104 &root->root_key, &root->root_item);
1102 1105
1103 btrfs_commit_transaction(trans, root); 1106 btrfs_commit_transaction(trans, root);
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index cc9b450399df..a178f5ebea78 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -280,6 +280,7 @@ static int lzo_decompress_biovec(struct list_head *ws,
280 unsigned long tot_out; 280 unsigned long tot_out;
281 unsigned long tot_len; 281 unsigned long tot_len;
282 char *buf; 282 char *buf;
283 bool may_late_unmap, need_unmap;
283 284
284 data_in = kmap(pages_in[0]); 285 data_in = kmap(pages_in[0]);
285 tot_len = read_compress_length(data_in); 286 tot_len = read_compress_length(data_in);
@@ -300,11 +301,13 @@ static int lzo_decompress_biovec(struct list_head *ws,
300 301
301 tot_in += in_len; 302 tot_in += in_len;
302 working_bytes = in_len; 303 working_bytes = in_len;
304 may_late_unmap = need_unmap = false;
303 305
304 /* fast path: avoid using the working buffer */ 306 /* fast path: avoid using the working buffer */
305 if (in_page_bytes_left >= in_len) { 307 if (in_page_bytes_left >= in_len) {
306 buf = data_in + in_offset; 308 buf = data_in + in_offset;
307 bytes = in_len; 309 bytes = in_len;
310 may_late_unmap = true;
308 goto cont; 311 goto cont;
309 } 312 }
310 313
@@ -329,14 +332,17 @@ cont:
329 if (working_bytes == 0 && tot_in >= tot_len) 332 if (working_bytes == 0 && tot_in >= tot_len)
330 break; 333 break;
331 334
332 kunmap(pages_in[page_in_index]); 335 if (page_in_index + 1 >= total_pages_in) {
333 page_in_index++;
334 if (page_in_index >= total_pages_in) {
335 ret = -1; 336 ret = -1;
336 data_in = NULL;
337 goto done; 337 goto done;
338 } 338 }
339 data_in = kmap(pages_in[page_in_index]); 339
340 if (may_late_unmap)
341 need_unmap = true;
342 else
343 kunmap(pages_in[page_in_index]);
344
345 data_in = kmap(pages_in[++page_in_index]);
340 346
341 in_page_bytes_left = PAGE_CACHE_SIZE; 347 in_page_bytes_left = PAGE_CACHE_SIZE;
342 in_offset = 0; 348 in_offset = 0;
@@ -346,6 +352,8 @@ cont:
346 out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE); 352 out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
347 ret = lzo1x_decompress_safe(buf, in_len, workspace->buf, 353 ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
348 &out_len); 354 &out_len);
355 if (need_unmap)
356 kunmap(pages_in[page_in_index - 1]);
349 if (ret != LZO_E_OK) { 357 if (ret != LZO_E_OK) {
350 printk(KERN_WARNING "btrfs decompress failed\n"); 358 printk(KERN_WARNING "btrfs decompress failed\n");
351 ret = -1; 359 ret = -1;
@@ -363,8 +371,7 @@ cont:
363 break; 371 break;
364 } 372 }
365done: 373done:
366 if (data_in) 374 kunmap(pages_in[page_in_index]);
367 kunmap(pages_in[page_in_index]);
368 return ret; 375 return ret;
369} 376}
370 377
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 0825e4ed9447..31ade5802ae8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3654,6 +3654,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3654 u32 item_size; 3654 u32 item_size;
3655 int ret; 3655 int ret;
3656 int err = 0; 3656 int err = 0;
3657 int progress = 0;
3657 3658
3658 path = btrfs_alloc_path(); 3659 path = btrfs_alloc_path();
3659 if (!path) 3660 if (!path)
@@ -3666,9 +3667,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3666 } 3667 }
3667 3668
3668 while (1) { 3669 while (1) {
3670 progress++;
3669 trans = btrfs_start_transaction(rc->extent_root, 0); 3671 trans = btrfs_start_transaction(rc->extent_root, 0);
3670 BUG_ON(IS_ERR(trans)); 3672 BUG_ON(IS_ERR(trans));
3671 3673restart:
3672 if (update_backref_cache(trans, &rc->backref_cache)) { 3674 if (update_backref_cache(trans, &rc->backref_cache)) {
3673 btrfs_end_transaction(trans, rc->extent_root); 3675 btrfs_end_transaction(trans, rc->extent_root);
3674 continue; 3676 continue;
@@ -3781,6 +3783,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3781 } 3783 }
3782 } 3784 }
3783 } 3785 }
3786 if (trans && progress && err == -ENOSPC) {
3787 ret = btrfs_force_chunk_alloc(trans, rc->extent_root,
3788 rc->block_group->flags);
3789 if (ret == 0) {
3790 err = 0;
3791 progress = 0;
3792 goto restart;
3793 }
3794 }
3784 3795
3785 btrfs_release_path(rc->extent_root, path); 3796 btrfs_release_path(rc->extent_root, path);
3786 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, 3797 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index a004008f7d28..d39a9895d932 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -155,7 +155,8 @@ enum {
155 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, 155 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
156 Opt_compress_type, Opt_compress_force, Opt_compress_force_type, 156 Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
157 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, 157 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
158 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err, 158 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
159 Opt_enospc_debug, Opt_err,
159}; 160};
160 161
161static match_table_t tokens = { 162static match_table_t tokens = {
@@ -184,6 +185,7 @@ static match_table_t tokens = {
184 {Opt_space_cache, "space_cache"}, 185 {Opt_space_cache, "space_cache"},
185 {Opt_clear_cache, "clear_cache"}, 186 {Opt_clear_cache, "clear_cache"},
186 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, 187 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
188 {Opt_enospc_debug, "enospc_debug"},
187 {Opt_err, NULL}, 189 {Opt_err, NULL},
188}; 190};
189 191
@@ -358,6 +360,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
358 case Opt_user_subvol_rm_allowed: 360 case Opt_user_subvol_rm_allowed:
359 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED); 361 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
360 break; 362 break;
363 case Opt_enospc_debug:
364 btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
365 break;
361 case Opt_err: 366 case Opt_err:
362 printk(KERN_INFO "btrfs: unrecognized mount option " 367 printk(KERN_INFO "btrfs: unrecognized mount option "
363 "'%s'\n", p); 368 "'%s'\n", p);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index af7dbca15276..dd13eb81ee40 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1338,11 +1338,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1338 1338
1339 ret = btrfs_shrink_device(device, 0); 1339 ret = btrfs_shrink_device(device, 0);
1340 if (ret) 1340 if (ret)
1341 goto error_brelse; 1341 goto error_undo;
1342 1342
1343 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); 1343 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
1344 if (ret) 1344 if (ret)
1345 goto error_brelse; 1345 goto error_undo;
1346 1346
1347 device->in_fs_metadata = 0; 1347 device->in_fs_metadata = 0;
1348 1348
@@ -1416,6 +1416,13 @@ out:
1416 mutex_unlock(&root->fs_info->volume_mutex); 1416 mutex_unlock(&root->fs_info->volume_mutex);
1417 mutex_unlock(&uuid_mutex); 1417 mutex_unlock(&uuid_mutex);
1418 return ret; 1418 return ret;
1419error_undo:
1420 if (device->writeable) {
1421 list_add(&device->dev_alloc_list,
1422 &root->fs_info->fs_devices->alloc_list);
1423 root->fs_info->fs_devices->rw_devices++;
1424 }
1425 goto error_brelse;
1419} 1426}
1420 1427
1421/* 1428/*
@@ -1633,7 +1640,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1633 device->dev_root = root->fs_info->dev_root; 1640 device->dev_root = root->fs_info->dev_root;
1634 device->bdev = bdev; 1641 device->bdev = bdev;
1635 device->in_fs_metadata = 1; 1642 device->in_fs_metadata = 1;
1636 device->mode = 0; 1643 device->mode = FMODE_EXCL;
1637 set_blocksize(device->bdev, 4096); 1644 set_blocksize(device->bdev, 4096);
1638 1645
1639 if (seeding_dev) { 1646 if (seeding_dev) {
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index f0aef787a102..ebafa65a29b6 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -60,7 +60,6 @@ int ceph_init_dentry(struct dentry *dentry)
60 } 60 }
61 di->dentry = dentry; 61 di->dentry = dentry;
62 di->lease_session = NULL; 62 di->lease_session = NULL;
63 di->parent_inode = igrab(dentry->d_parent->d_inode);
64 dentry->d_fsdata = di; 63 dentry->d_fsdata = di;
65 dentry->d_time = jiffies; 64 dentry->d_time = jiffies;
66 ceph_dentry_lru_add(dentry); 65 ceph_dentry_lru_add(dentry);
@@ -410,7 +409,7 @@ more:
410 spin_lock(&inode->i_lock); 409 spin_lock(&inode->i_lock);
411 if (ci->i_release_count == fi->dir_release_count) { 410 if (ci->i_release_count == fi->dir_release_count) {
412 dout(" marking %p complete\n", inode); 411 dout(" marking %p complete\n", inode);
413 ci->i_ceph_flags |= CEPH_I_COMPLETE; 412 /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
414 ci->i_max_offset = filp->f_pos; 413 ci->i_max_offset = filp->f_pos;
415 } 414 }
416 spin_unlock(&inode->i_lock); 415 spin_unlock(&inode->i_lock);
@@ -497,6 +496,7 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
497 496
498 /* .snap dir? */ 497 /* .snap dir? */
499 if (err == -ENOENT && 498 if (err == -ENOENT &&
499 ceph_snap(parent) == CEPH_NOSNAP &&
500 strcmp(dentry->d_name.name, 500 strcmp(dentry->d_name.name,
501 fsc->mount_options->snapdir_name) == 0) { 501 fsc->mount_options->snapdir_name) == 0) {
502 struct inode *inode = ceph_get_snapdir(parent); 502 struct inode *inode = ceph_get_snapdir(parent);
@@ -993,7 +993,7 @@ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
993{ 993{
994 struct inode *dir; 994 struct inode *dir;
995 995
996 if (nd->flags & LOOKUP_RCU) 996 if (nd && nd->flags & LOOKUP_RCU)
997 return -ECHILD; 997 return -ECHILD;
998 998
999 dir = dentry->d_parent->d_inode; 999 dir = dentry->d_parent->d_inode;
@@ -1030,28 +1030,8 @@ out_touch:
1030static void ceph_dentry_release(struct dentry *dentry) 1030static void ceph_dentry_release(struct dentry *dentry)
1031{ 1031{
1032 struct ceph_dentry_info *di = ceph_dentry(dentry); 1032 struct ceph_dentry_info *di = ceph_dentry(dentry);
1033 struct inode *parent_inode = NULL;
1034 u64 snapid = CEPH_NOSNAP;
1035 1033
1036 if (!IS_ROOT(dentry)) { 1034 dout("dentry_release %p\n", dentry);
1037 parent_inode = di->parent_inode;
1038 if (parent_inode)
1039 snapid = ceph_snap(parent_inode);
1040 }
1041 dout("dentry_release %p parent %p\n", dentry, parent_inode);
1042 if (parent_inode && snapid != CEPH_SNAPDIR) {
1043 struct ceph_inode_info *ci = ceph_inode(parent_inode);
1044
1045 spin_lock(&parent_inode->i_lock);
1046 if (ci->i_shared_gen == di->lease_shared_gen ||
1047 snapid <= CEPH_MAXSNAP) {
1048 dout(" clearing %p complete (d_release)\n",
1049 parent_inode);
1050 ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
1051 ci->i_release_count++;
1052 }
1053 spin_unlock(&parent_inode->i_lock);
1054 }
1055 if (di) { 1035 if (di) {
1056 ceph_dentry_lru_del(dentry); 1036 ceph_dentry_lru_del(dentry);
1057 if (di->lease_session) 1037 if (di->lease_session)
@@ -1059,8 +1039,6 @@ static void ceph_dentry_release(struct dentry *dentry)
1059 kmem_cache_free(ceph_dentry_cachep, di); 1039 kmem_cache_free(ceph_dentry_cachep, di);
1060 dentry->d_fsdata = NULL; 1040 dentry->d_fsdata = NULL;
1061 } 1041 }
1062 if (parent_inode)
1063 iput(parent_inode);
1064} 1042}
1065 1043
1066static int ceph_snapdir_d_revalidate(struct dentry *dentry, 1044static int ceph_snapdir_d_revalidate(struct dentry *dentry,
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5625463aa479..193bfa5e9cbd 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -707,7 +707,7 @@ static int fill_inode(struct inode *inode,
707 (issued & CEPH_CAP_FILE_EXCL) == 0 && 707 (issued & CEPH_CAP_FILE_EXCL) == 0 &&
708 (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { 708 (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
709 dout(" marking %p complete (empty)\n", inode); 709 dout(" marking %p complete (empty)\n", inode);
710 ci->i_ceph_flags |= CEPH_I_COMPLETE; 710 /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
711 ci->i_max_offset = 2; 711 ci->i_max_offset = 2;
712 } 712 }
713 break; 713 break;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 88fcaa21b801..20b907d76ae2 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -207,7 +207,6 @@ struct ceph_dentry_info {
207 struct dentry *dentry; 207 struct dentry *dentry;
208 u64 time; 208 u64 time;
209 u64 offset; 209 u64 offset;
210 struct inode *parent_inode;
211}; 210};
212 211
213struct ceph_inode_xattrs_info { 212struct ceph_inode_xattrs_info {
diff --git a/fs/compat.c b/fs/compat.c
index f6fd0a00e6cc..c6d31a3bab88 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -262,35 +262,19 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
262 */ 262 */
263asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf) 263asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf)
264{ 264{
265 struct path path; 265 struct kstatfs tmp;
266 int error; 266 int error = user_statfs(pathname, &tmp);
267 267 if (!error)
268 error = user_path(pathname, &path); 268 error = put_compat_statfs(buf, &tmp);
269 if (!error) {
270 struct kstatfs tmp;
271 error = vfs_statfs(&path, &tmp);
272 if (!error)
273 error = put_compat_statfs(buf, &tmp);
274 path_put(&path);
275 }
276 return error; 269 return error;
277} 270}
278 271
279asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf) 272asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf)
280{ 273{
281 struct file * file;
282 struct kstatfs tmp; 274 struct kstatfs tmp;
283 int error; 275 int error = fd_statfs(fd, &tmp);
284
285 error = -EBADF;
286 file = fget(fd);
287 if (!file)
288 goto out;
289 error = vfs_statfs(&file->f_path, &tmp);
290 if (!error) 276 if (!error)
291 error = put_compat_statfs(buf, &tmp); 277 error = put_compat_statfs(buf, &tmp);
292 fput(file);
293out:
294 return error; 278 return error;
295} 279}
296 280
@@ -329,41 +313,29 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
329 313
330asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf) 314asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf)
331{ 315{
332 struct path path; 316 struct kstatfs tmp;
333 int error; 317 int error;
334 318
335 if (sz != sizeof(*buf)) 319 if (sz != sizeof(*buf))
336 return -EINVAL; 320 return -EINVAL;
337 321
338 error = user_path(pathname, &path); 322 error = user_statfs(pathname, &tmp);
339 if (!error) { 323 if (!error)
340 struct kstatfs tmp; 324 error = put_compat_statfs64(buf, &tmp);
341 error = vfs_statfs(&path, &tmp);
342 if (!error)
343 error = put_compat_statfs64(buf, &tmp);
344 path_put(&path);
345 }
346 return error; 325 return error;
347} 326}
348 327
349asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf) 328asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf)
350{ 329{
351 struct file * file;
352 struct kstatfs tmp; 330 struct kstatfs tmp;
353 int error; 331 int error;
354 332
355 if (sz != sizeof(*buf)) 333 if (sz != sizeof(*buf))
356 return -EINVAL; 334 return -EINVAL;
357 335
358 error = -EBADF; 336 error = fd_statfs(fd, &tmp);
359 file = fget(fd);
360 if (!file)
361 goto out;
362 error = vfs_statfs(&file->f_path, &tmp);
363 if (!error) 337 if (!error)
364 error = put_compat_statfs64(buf, &tmp); 338 error = put_compat_statfs64(buf, &tmp);
365 fput(file);
366out:
367 return error; 339 return error;
368} 340}
369 341
@@ -1228,7 +1200,9 @@ compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
1228 file = fget_light(fd, &fput_needed); 1200 file = fget_light(fd, &fput_needed);
1229 if (!file) 1201 if (!file)
1230 return -EBADF; 1202 return -EBADF;
1231 ret = compat_readv(file, vec, vlen, &pos); 1203 ret = -ESPIPE;
1204 if (file->f_mode & FMODE_PREAD)
1205 ret = compat_readv(file, vec, vlen, &pos);
1232 fput_light(file, fput_needed); 1206 fput_light(file, fput_needed);
1233 return ret; 1207 return ret;
1234} 1208}
@@ -1285,7 +1259,9 @@ compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
1285 file = fget_light(fd, &fput_needed); 1259 file = fget_light(fd, &fput_needed);
1286 if (!file) 1260 if (!file)
1287 return -EBADF; 1261 return -EBADF;
1288 ret = compat_writev(file, vec, vlen, &pos); 1262 ret = -ESPIPE;
1263 if (file->f_mode & FMODE_PWRITE)
1264 ret = compat_writev(file, vec, vlen, &pos);
1289 fput_light(file, fput_needed); 1265 fput_light(file, fput_needed);
1290 return ret; 1266 return ret;
1291} 1267}
@@ -2308,3 +2284,16 @@ asmlinkage long compat_sys_timerfd_gettime(int ufd,
2308} 2284}
2309 2285
2310#endif /* CONFIG_TIMERFD */ 2286#endif /* CONFIG_TIMERFD */
2287
2288#ifdef CONFIG_FHANDLE
2289/*
2290 * Exactly like fs/open.c:sys_open_by_handle_at(), except that it
2291 * doesn't set the O_LARGEFILE flag.
2292 */
2293asmlinkage long
2294compat_sys_open_by_handle_at(int mountdirfd,
2295 struct file_handle __user *handle, int flags)
2296{
2297 return do_handle_open(mountdirfd, handle, flags);
2298}
2299#endif
diff --git a/fs/dcache.c b/fs/dcache.c
index 2a6bd9a4ae97..a39fe47c466f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -296,8 +296,12 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
296 __releases(parent->d_lock) 296 __releases(parent->d_lock)
297 __releases(dentry->d_inode->i_lock) 297 __releases(dentry->d_inode->i_lock)
298{ 298{
299 dentry->d_parent = NULL;
300 list_del(&dentry->d_u.d_child); 299 list_del(&dentry->d_u.d_child);
300 /*
301 * Inform try_to_ascend() that we are no longer attached to the
302 * dentry tree
303 */
304 dentry->d_flags |= DCACHE_DISCONNECTED;
301 if (parent) 305 if (parent)
302 spin_unlock(&parent->d_lock); 306 spin_unlock(&parent->d_lock);
303 dentry_iput(dentry); 307 dentry_iput(dentry);
@@ -1012,6 +1016,35 @@ void shrink_dcache_for_umount(struct super_block *sb)
1012} 1016}
1013 1017
1014/* 1018/*
1019 * This tries to ascend one level of parenthood, but
1020 * we can race with renaming, so we need to re-check
1021 * the parenthood after dropping the lock and check
1022 * that the sequence number still matches.
1023 */
1024static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
1025{
1026 struct dentry *new = old->d_parent;
1027
1028 rcu_read_lock();
1029 spin_unlock(&old->d_lock);
1030 spin_lock(&new->d_lock);
1031
1032 /*
1033 * might go back up the wrong parent if we have had a rename
1034 * or deletion
1035 */
1036 if (new != old->d_parent ||
1037 (old->d_flags & DCACHE_DISCONNECTED) ||
1038 (!locked && read_seqretry(&rename_lock, seq))) {
1039 spin_unlock(&new->d_lock);
1040 new = NULL;
1041 }
1042 rcu_read_unlock();
1043 return new;
1044}
1045
1046
1047/*
1015 * Search for at least 1 mount point in the dentry's subdirs. 1048 * Search for at least 1 mount point in the dentry's subdirs.
1016 * We descend to the next level whenever the d_subdirs 1049 * We descend to the next level whenever the d_subdirs
1017 * list is non-empty and continue searching. 1050 * list is non-empty and continue searching.
@@ -1066,24 +1099,10 @@ resume:
1066 * All done at this level ... ascend and resume the search. 1099 * All done at this level ... ascend and resume the search.
1067 */ 1100 */
1068 if (this_parent != parent) { 1101 if (this_parent != parent) {
1069 struct dentry *tmp; 1102 struct dentry *child = this_parent;
1070 struct dentry *child; 1103 this_parent = try_to_ascend(this_parent, locked, seq);
1071 1104 if (!this_parent)
1072 tmp = this_parent->d_parent;
1073 rcu_read_lock();
1074 spin_unlock(&this_parent->d_lock);
1075 child = this_parent;
1076 this_parent = tmp;
1077 spin_lock(&this_parent->d_lock);
1078 /* might go back up the wrong parent if we have had a rename
1079 * or deletion */
1080 if (this_parent != child->d_parent ||
1081 (!locked && read_seqretry(&rename_lock, seq))) {
1082 spin_unlock(&this_parent->d_lock);
1083 rcu_read_unlock();
1084 goto rename_retry; 1105 goto rename_retry;
1085 }
1086 rcu_read_unlock();
1087 next = child->d_u.d_child.next; 1106 next = child->d_u.d_child.next;
1088 goto resume; 1107 goto resume;
1089 } 1108 }
@@ -1181,24 +1200,10 @@ resume:
1181 * All done at this level ... ascend and resume the search. 1200 * All done at this level ... ascend and resume the search.
1182 */ 1201 */
1183 if (this_parent != parent) { 1202 if (this_parent != parent) {
1184 struct dentry *tmp; 1203 struct dentry *child = this_parent;
1185 struct dentry *child; 1204 this_parent = try_to_ascend(this_parent, locked, seq);
1186 1205 if (!this_parent)
1187 tmp = this_parent->d_parent;
1188 rcu_read_lock();
1189 spin_unlock(&this_parent->d_lock);
1190 child = this_parent;
1191 this_parent = tmp;
1192 spin_lock(&this_parent->d_lock);
1193 /* might go back up the wrong parent if we have had a rename
1194 * or deletion */
1195 if (this_parent != child->d_parent ||
1196 (!locked && read_seqretry(&rename_lock, seq))) {
1197 spin_unlock(&this_parent->d_lock);
1198 rcu_read_unlock();
1199 goto rename_retry; 1206 goto rename_retry;
1200 }
1201 rcu_read_unlock();
1202 next = child->d_u.d_child.next; 1207 next = child->d_u.d_child.next;
1203 goto resume; 1208 goto resume;
1204 } 1209 }
@@ -1523,6 +1528,28 @@ struct dentry * d_alloc_root(struct inode * root_inode)
1523} 1528}
1524EXPORT_SYMBOL(d_alloc_root); 1529EXPORT_SYMBOL(d_alloc_root);
1525 1530
1531static struct dentry * __d_find_any_alias(struct inode *inode)
1532{
1533 struct dentry *alias;
1534
1535 if (list_empty(&inode->i_dentry))
1536 return NULL;
1537 alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
1538 __dget(alias);
1539 return alias;
1540}
1541
1542static struct dentry * d_find_any_alias(struct inode *inode)
1543{
1544 struct dentry *de;
1545
1546 spin_lock(&inode->i_lock);
1547 de = __d_find_any_alias(inode);
1548 spin_unlock(&inode->i_lock);
1549 return de;
1550}
1551
1552
1526/** 1553/**
1527 * d_obtain_alias - find or allocate a dentry for a given inode 1554 * d_obtain_alias - find or allocate a dentry for a given inode
1528 * @inode: inode to allocate the dentry for 1555 * @inode: inode to allocate the dentry for
@@ -1552,7 +1579,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
1552 if (IS_ERR(inode)) 1579 if (IS_ERR(inode))
1553 return ERR_CAST(inode); 1580 return ERR_CAST(inode);
1554 1581
1555 res = d_find_alias(inode); 1582 res = d_find_any_alias(inode);
1556 if (res) 1583 if (res)
1557 goto out_iput; 1584 goto out_iput;
1558 1585
@@ -1565,7 +1592,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
1565 1592
1566 1593
1567 spin_lock(&inode->i_lock); 1594 spin_lock(&inode->i_lock);
1568 res = __d_find_alias(inode, 0); 1595 res = __d_find_any_alias(inode);
1569 if (res) { 1596 if (res) {
1570 spin_unlock(&inode->i_lock); 1597 spin_unlock(&inode->i_lock);
1571 dput(tmp); 1598 dput(tmp);
@@ -2920,28 +2947,14 @@ resume:
2920 spin_unlock(&dentry->d_lock); 2947 spin_unlock(&dentry->d_lock);
2921 } 2948 }
2922 if (this_parent != root) { 2949 if (this_parent != root) {
2923 struct dentry *tmp; 2950 struct dentry *child = this_parent;
2924 struct dentry *child;
2925
2926 tmp = this_parent->d_parent;
2927 if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { 2951 if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
2928 this_parent->d_flags |= DCACHE_GENOCIDE; 2952 this_parent->d_flags |= DCACHE_GENOCIDE;
2929 this_parent->d_count--; 2953 this_parent->d_count--;
2930 } 2954 }
2931 rcu_read_lock(); 2955 this_parent = try_to_ascend(this_parent, locked, seq);
2932 spin_unlock(&this_parent->d_lock); 2956 if (!this_parent)
2933 child = this_parent;
2934 this_parent = tmp;
2935 spin_lock(&this_parent->d_lock);
2936 /* might go back up the wrong parent if we have had a rename
2937 * or deletion */
2938 if (this_parent != child->d_parent ||
2939 (!locked && read_seqretry(&rename_lock, seq))) {
2940 spin_unlock(&this_parent->d_lock);
2941 rcu_read_unlock();
2942 goto rename_retry; 2957 goto rename_retry;
2943 }
2944 rcu_read_unlock();
2945 next = child->d_u.d_child.next; 2958 next = child->d_u.d_child.next;
2946 goto resume; 2959 goto resume;
2947 } 2960 }
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 267d0ada4541..4a09af9e9a63 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -63,6 +63,13 @@
63 * cleanup path and it is also acquired by eventpoll_release_file() 63 * cleanup path and it is also acquired by eventpoll_release_file()
64 * if a file has been pushed inside an epoll set and it is then 64 * if a file has been pushed inside an epoll set and it is then
65 * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL). 65 * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL).
66 * It is also acquired when inserting an epoll fd onto another epoll
67 * fd. We do this so that we walk the epoll tree and ensure that this
68 * insertion does not create a cycle of epoll file descriptors, which
69 * could lead to deadlock. We need a global mutex to prevent two
70 * simultaneous inserts (A into B and B into A) from racing and
71 * constructing a cycle without either insert observing that it is
72 * going to.
66 * It is possible to drop the "ep->mtx" and to use the global 73 * It is possible to drop the "ep->mtx" and to use the global
67 * mutex "epmutex" (together with "ep->lock") to have it working, 74 * mutex "epmutex" (together with "ep->lock") to have it working,
68 * but having "ep->mtx" will make the interface more scalable. 75 * but having "ep->mtx" will make the interface more scalable.
@@ -224,6 +231,9 @@ static long max_user_watches __read_mostly;
224 */ 231 */
225static DEFINE_MUTEX(epmutex); 232static DEFINE_MUTEX(epmutex);
226 233
234/* Used to check for epoll file descriptor inclusion loops */
235static struct nested_calls poll_loop_ncalls;
236
227/* Used for safe wake up implementation */ 237/* Used for safe wake up implementation */
228static struct nested_calls poll_safewake_ncalls; 238static struct nested_calls poll_safewake_ncalls;
229 239
@@ -1198,6 +1208,62 @@ retry:
1198 return res; 1208 return res;
1199} 1209}
1200 1210
1211/**
1212 * ep_loop_check_proc - Callback function to be passed to the @ep_call_nested()
1213 * API, to verify that adding an epoll file inside another
1214 * epoll structure, does not violate the constraints, in
1215 * terms of closed loops, or too deep chains (which can
1216 * result in excessive stack usage).
1217 *
1218 * @priv: Pointer to the epoll file to be currently checked.
1219 * @cookie: Original cookie for this call. This is the top-of-the-chain epoll
1220 * data structure pointer.
1221 * @call_nests: Current dept of the @ep_call_nested() call stack.
1222 *
1223 * Returns: Returns zero if adding the epoll @file inside current epoll
1224 * structure @ep does not violate the constraints, or -1 otherwise.
1225 */
1226static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
1227{
1228 int error = 0;
1229 struct file *file = priv;
1230 struct eventpoll *ep = file->private_data;
1231 struct rb_node *rbp;
1232 struct epitem *epi;
1233
1234 mutex_lock(&ep->mtx);
1235 for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
1236 epi = rb_entry(rbp, struct epitem, rbn);
1237 if (unlikely(is_file_epoll(epi->ffd.file))) {
1238 error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1239 ep_loop_check_proc, epi->ffd.file,
1240 epi->ffd.file->private_data, current);
1241 if (error != 0)
1242 break;
1243 }
1244 }
1245 mutex_unlock(&ep->mtx);
1246
1247 return error;
1248}
1249
1250/**
1251 * ep_loop_check - Performs a check to verify that adding an epoll file (@file)
1252 * another epoll file (represented by @ep) does not create
1253 * closed loops or too deep chains.
1254 *
1255 * @ep: Pointer to the epoll private data structure.
1256 * @file: Pointer to the epoll file to be checked.
1257 *
1258 * Returns: Returns zero if adding the epoll @file inside current epoll
1259 * structure @ep does not violate the constraints, or -1 otherwise.
1260 */
1261static int ep_loop_check(struct eventpoll *ep, struct file *file)
1262{
1263 return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1264 ep_loop_check_proc, file, ep, current);
1265}
1266
1201/* 1267/*
1202 * Open an eventpoll file descriptor. 1268 * Open an eventpoll file descriptor.
1203 */ 1269 */
@@ -1246,6 +1312,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1246 struct epoll_event __user *, event) 1312 struct epoll_event __user *, event)
1247{ 1313{
1248 int error; 1314 int error;
1315 int did_lock_epmutex = 0;
1249 struct file *file, *tfile; 1316 struct file *file, *tfile;
1250 struct eventpoll *ep; 1317 struct eventpoll *ep;
1251 struct epitem *epi; 1318 struct epitem *epi;
@@ -1287,6 +1354,25 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1287 */ 1354 */
1288 ep = file->private_data; 1355 ep = file->private_data;
1289 1356
1357 /*
1358 * When we insert an epoll file descriptor, inside another epoll file
1359 * descriptor, there is the change of creating closed loops, which are
1360 * better be handled here, than in more critical paths.
1361 *
1362 * We hold epmutex across the loop check and the insert in this case, in
1363 * order to prevent two separate inserts from racing and each doing the
1364 * insert "at the same time" such that ep_loop_check passes on both
1365 * before either one does the insert, thereby creating a cycle.
1366 */
1367 if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) {
1368 mutex_lock(&epmutex);
1369 did_lock_epmutex = 1;
1370 error = -ELOOP;
1371 if (ep_loop_check(ep, tfile) != 0)
1372 goto error_tgt_fput;
1373 }
1374
1375
1290 mutex_lock(&ep->mtx); 1376 mutex_lock(&ep->mtx);
1291 1377
1292 /* 1378 /*
@@ -1322,6 +1408,9 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1322 mutex_unlock(&ep->mtx); 1408 mutex_unlock(&ep->mtx);
1323 1409
1324error_tgt_fput: 1410error_tgt_fput:
1411 if (unlikely(did_lock_epmutex))
1412 mutex_unlock(&epmutex);
1413
1325 fput(tfile); 1414 fput(tfile);
1326error_fput: 1415error_fput:
1327 fput(file); 1416 fput(file);
@@ -1441,6 +1530,12 @@ static int __init eventpoll_init(void)
1441 EP_ITEM_COST; 1530 EP_ITEM_COST;
1442 BUG_ON(max_user_watches < 0); 1531 BUG_ON(max_user_watches < 0);
1443 1532
1533 /*
1534 * Initialize the structure used to perform epoll file descriptor
1535 * inclusion loops checks.
1536 */
1537 ep_nested_calls_init(&poll_loop_ncalls);
1538
1444 /* Initialize the structure used to perform safe poll wait head wake ups */ 1539 /* Initialize the structure used to perform safe poll wait head wake ups */
1445 ep_nested_calls_init(&poll_safewake_ncalls); 1540 ep_nested_calls_init(&poll_safewake_ncalls);
1446 1541
diff --git a/fs/exec.c b/fs/exec.c
index 52a447d9b6ab..ba99e1abb1aa 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -115,13 +115,16 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
115 struct file *file; 115 struct file *file;
116 char *tmp = getname(library); 116 char *tmp = getname(library);
117 int error = PTR_ERR(tmp); 117 int error = PTR_ERR(tmp);
118 static const struct open_flags uselib_flags = {
119 .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
120 .acc_mode = MAY_READ | MAY_EXEC | MAY_OPEN,
121 .intent = LOOKUP_OPEN
122 };
118 123
119 if (IS_ERR(tmp)) 124 if (IS_ERR(tmp))
120 goto out; 125 goto out;
121 126
122 file = do_filp_open(AT_FDCWD, tmp, 127 file = do_filp_open(AT_FDCWD, tmp, &uselib_flags, LOOKUP_FOLLOW);
123 O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
124 MAY_READ | MAY_EXEC | MAY_OPEN);
125 putname(tmp); 128 putname(tmp);
126 error = PTR_ERR(file); 129 error = PTR_ERR(file);
127 if (IS_ERR(file)) 130 if (IS_ERR(file))
@@ -721,10 +724,13 @@ struct file *open_exec(const char *name)
721{ 724{
722 struct file *file; 725 struct file *file;
723 int err; 726 int err;
727 static const struct open_flags open_exec_flags = {
728 .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
729 .acc_mode = MAY_EXEC | MAY_OPEN,
730 .intent = LOOKUP_OPEN
731 };
724 732
725 file = do_filp_open(AT_FDCWD, name, 733 file = do_filp_open(AT_FDCWD, name, &open_exec_flags, LOOKUP_FOLLOW);
726 O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
727 MAY_EXEC | MAY_OPEN);
728 if (IS_ERR(file)) 734 if (IS_ERR(file))
729 goto out; 735 goto out;
730 736
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 264e95d02830..4d70db110cfc 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -272,7 +272,6 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
272 new_de = exofs_find_entry(new_dir, new_dentry, &new_page); 272 new_de = exofs_find_entry(new_dir, new_dentry, &new_page);
273 if (!new_de) 273 if (!new_de)
274 goto out_dir; 274 goto out_dir;
275 inode_inc_link_count(old_inode);
276 err = exofs_set_link(new_dir, new_de, new_page, old_inode); 275 err = exofs_set_link(new_dir, new_de, new_page, old_inode);
277 new_inode->i_ctime = CURRENT_TIME; 276 new_inode->i_ctime = CURRENT_TIME;
278 if (dir_de) 277 if (dir_de)
@@ -286,12 +285,9 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
286 if (new_dir->i_nlink >= EXOFS_LINK_MAX) 285 if (new_dir->i_nlink >= EXOFS_LINK_MAX)
287 goto out_dir; 286 goto out_dir;
288 } 287 }
289 inode_inc_link_count(old_inode);
290 err = exofs_add_link(new_dentry, old_inode); 288 err = exofs_add_link(new_dentry, old_inode);
291 if (err) { 289 if (err)
292 inode_dec_link_count(old_inode);
293 goto out_dir; 290 goto out_dir;
294 }
295 if (dir_de) 291 if (dir_de)
296 inode_inc_link_count(new_dir); 292 inode_inc_link_count(new_dir);
297 } 293 }
@@ -299,7 +295,7 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
299 old_inode->i_ctime = CURRENT_TIME; 295 old_inode->i_ctime = CURRENT_TIME;
300 296
301 exofs_delete_entry(old_de, old_page); 297 exofs_delete_entry(old_de, old_page);
302 inode_dec_link_count(old_inode); 298 mark_inode_dirty(old_inode);
303 299
304 if (dir_de) { 300 if (dir_de) {
305 err = exofs_set_link(old_inode, dir_de, dir_page, new_dir); 301 err = exofs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 4b6825740dd5..b05acb796135 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -320,9 +320,14 @@ static int export_encode_fh(struct dentry *dentry, struct fid *fid,
320 struct inode * inode = dentry->d_inode; 320 struct inode * inode = dentry->d_inode;
321 int len = *max_len; 321 int len = *max_len;
322 int type = FILEID_INO32_GEN; 322 int type = FILEID_INO32_GEN;
323 323
324 if (len < 2 || (connectable && len < 4)) 324 if (connectable && (len < 4)) {
325 *max_len = 4;
326 return 255;
327 } else if (len < 2) {
328 *max_len = 2;
325 return 255; 329 return 255;
330 }
326 331
327 len = 2; 332 len = 2;
328 fid->i32.ino = inode->i_ino; 333 fid->i32.ino = inode->i_ino;
@@ -369,6 +374,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
369 /* 374 /*
370 * Try to get any dentry for the given file handle from the filesystem. 375 * Try to get any dentry for the given file handle from the filesystem.
371 */ 376 */
377 if (!nop || !nop->fh_to_dentry)
378 return ERR_PTR(-ESTALE);
372 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); 379 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);
373 if (!result) 380 if (!result)
374 result = ERR_PTR(-ESTALE); 381 result = ERR_PTR(-ESTALE);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 2e1d8341d827..adb91855ccd0 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -344,7 +344,6 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
344 new_de = ext2_find_entry (new_dir, &new_dentry->d_name, &new_page); 344 new_de = ext2_find_entry (new_dir, &new_dentry->d_name, &new_page);
345 if (!new_de) 345 if (!new_de)
346 goto out_dir; 346 goto out_dir;
347 inode_inc_link_count(old_inode);
348 ext2_set_link(new_dir, new_de, new_page, old_inode, 1); 347 ext2_set_link(new_dir, new_de, new_page, old_inode, 1);
349 new_inode->i_ctime = CURRENT_TIME_SEC; 348 new_inode->i_ctime = CURRENT_TIME_SEC;
350 if (dir_de) 349 if (dir_de)
@@ -356,12 +355,9 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
356 if (new_dir->i_nlink >= EXT2_LINK_MAX) 355 if (new_dir->i_nlink >= EXT2_LINK_MAX)
357 goto out_dir; 356 goto out_dir;
358 } 357 }
359 inode_inc_link_count(old_inode);
360 err = ext2_add_link(new_dentry, old_inode); 358 err = ext2_add_link(new_dentry, old_inode);
361 if (err) { 359 if (err)
362 inode_dec_link_count(old_inode);
363 goto out_dir; 360 goto out_dir;
364 }
365 if (dir_de) 361 if (dir_de)
366 inode_inc_link_count(new_dir); 362 inode_inc_link_count(new_dir);
367 } 363 }
@@ -369,12 +365,11 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
369 /* 365 /*
370 * Like most other Unix systems, set the ctime for inodes on a 366 * Like most other Unix systems, set the ctime for inodes on a
371 * rename. 367 * rename.
372 * inode_dec_link_count() will mark the inode dirty.
373 */ 368 */
374 old_inode->i_ctime = CURRENT_TIME_SEC; 369 old_inode->i_ctime = CURRENT_TIME_SEC;
370 mark_inode_dirty(old_inode);
375 371
376 ext2_delete_entry (old_de, old_page); 372 ext2_delete_entry (old_de, old_page);
377 inode_dec_link_count(old_inode);
378 373
379 if (dir_de) { 374 if (dir_de) {
380 if (old_dir != new_dir) 375 if (old_dir != new_dir)
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index b27ba71810ec..561f69256266 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2253,13 +2253,6 @@ static int ext3_link (struct dentry * old_dentry,
2253 2253
2254 dquot_initialize(dir); 2254 dquot_initialize(dir);
2255 2255
2256 /*
2257 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
2258 * otherwise has the potential to corrupt the orphan inode list.
2259 */
2260 if (inode->i_nlink == 0)
2261 return -ENOENT;
2262
2263retry: 2256retry:
2264 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 2257 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
2265 EXT3_INDEX_EXTRA_TRANS_BLOCKS); 2258 EXT3_INDEX_EXTRA_TRANS_BLOCKS);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 85c8cc8f2473..9cc19a1dea8e 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1936,6 +1936,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1936 sb->s_qcop = &ext3_qctl_operations; 1936 sb->s_qcop = &ext3_qctl_operations;
1937 sb->dq_op = &ext3_quota_operations; 1937 sb->dq_op = &ext3_quota_operations;
1938#endif 1938#endif
1939 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
1939 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 1940 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
1940 mutex_init(&sbi->s_orphan_lock); 1941 mutex_init(&sbi->s_orphan_lock);
1941 mutex_init(&sbi->s_resize_lock); 1942 mutex_init(&sbi->s_resize_lock);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5485390d32c5..e781b7ea5630 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2304,13 +2304,6 @@ static int ext4_link(struct dentry *old_dentry,
2304 2304
2305 dquot_initialize(dir); 2305 dquot_initialize(dir);
2306 2306
2307 /*
2308 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
2309 * otherwise has the potential to corrupt the orphan inode list.
2310 */
2311 if (inode->i_nlink == 0)
2312 return -ENOENT;
2313
2314retry: 2307retry:
2315 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 2308 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2316 EXT4_INDEX_EXTRA_TRANS_BLOCKS); 2309 EXT4_INDEX_EXTRA_TRANS_BLOCKS);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f6a318f836b2..5977b356a435 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3415,6 +3415,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3415 sb->s_qcop = &ext4_qctl_operations; 3415 sb->s_qcop = &ext4_qctl_operations;
3416 sb->dq_op = &ext4_quota_operations; 3416 sb->dq_op = &ext4_quota_operations;
3417#endif 3417#endif
3418 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
3419
3418 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 3420 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
3419 mutex_init(&sbi->s_orphan_lock); 3421 mutex_init(&sbi->s_orphan_lock);
3420 mutex_init(&sbi->s_resize_lock); 3422 mutex_init(&sbi->s_resize_lock);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 86753fe10bd1..0e277ec4b612 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -757,8 +757,10 @@ fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable)
757 struct inode *inode = de->d_inode; 757 struct inode *inode = de->d_inode;
758 u32 ipos_h, ipos_m, ipos_l; 758 u32 ipos_h, ipos_m, ipos_l;
759 759
760 if (len < 5) 760 if (len < 5) {
761 *lenp = 5;
761 return 255; /* no room */ 762 return 255; /* no room */
763 }
762 764
763 ipos_h = MSDOS_I(inode)->i_pos >> 8; 765 ipos_h = MSDOS_I(inode)->i_pos >> 8;
764 ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24; 766 ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24;
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index f88f752babd9..adae3fb7451a 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -43,7 +43,7 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
43 43
44static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) 44static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
45{ 45{
46 if (nd->flags & LOOKUP_RCU) 46 if (nd && nd->flags & LOOKUP_RCU)
47 return -ECHILD; 47 return -ECHILD;
48 48
49 /* This is not negative dentry. Always valid. */ 49 /* This is not negative dentry. Always valid. */
@@ -54,7 +54,7 @@ static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
54 54
55static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) 55static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
56{ 56{
57 if (nd->flags & LOOKUP_RCU) 57 if (nd && nd->flags & LOOKUP_RCU)
58 return -ECHILD; 58 return -ECHILD;
59 59
60 /* 60 /*
diff --git a/fs/fcntl.c b/fs/fcntl.c
index cb1026181bdc..6c82e5bac039 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -131,7 +131,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
131SYSCALL_DEFINE1(dup, unsigned int, fildes) 131SYSCALL_DEFINE1(dup, unsigned int, fildes)
132{ 132{
133 int ret = -EBADF; 133 int ret = -EBADF;
134 struct file *file = fget(fildes); 134 struct file *file = fget_raw(fildes);
135 135
136 if (file) { 136 if (file) {
137 ret = get_unused_fd(); 137 ret = get_unused_fd();
@@ -426,15 +426,35 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
426 return err; 426 return err;
427} 427}
428 428
429static int check_fcntl_cmd(unsigned cmd)
430{
431 switch (cmd) {
432 case F_DUPFD:
433 case F_DUPFD_CLOEXEC:
434 case F_GETFD:
435 case F_SETFD:
436 case F_GETFL:
437 return 1;
438 }
439 return 0;
440}
441
429SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) 442SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
430{ 443{
431 struct file *filp; 444 struct file *filp;
432 long err = -EBADF; 445 long err = -EBADF;
433 446
434 filp = fget(fd); 447 filp = fget_raw(fd);
435 if (!filp) 448 if (!filp)
436 goto out; 449 goto out;
437 450
451 if (unlikely(filp->f_mode & FMODE_PATH)) {
452 if (!check_fcntl_cmd(cmd)) {
453 fput(filp);
454 goto out;
455 }
456 }
457
438 err = security_file_fcntl(filp, cmd, arg); 458 err = security_file_fcntl(filp, cmd, arg);
439 if (err) { 459 if (err) {
440 fput(filp); 460 fput(filp);
@@ -456,10 +476,17 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
456 long err; 476 long err;
457 477
458 err = -EBADF; 478 err = -EBADF;
459 filp = fget(fd); 479 filp = fget_raw(fd);
460 if (!filp) 480 if (!filp)
461 goto out; 481 goto out;
462 482
483 if (unlikely(filp->f_mode & FMODE_PATH)) {
484 if (!check_fcntl_cmd(cmd)) {
485 fput(filp);
486 goto out;
487 }
488 }
489
463 err = security_file_fcntl(filp, cmd, arg); 490 err = security_file_fcntl(filp, cmd, arg);
464 if (err) { 491 if (err) {
465 fput(filp); 492 fput(filp);
@@ -808,14 +835,14 @@ static int __init fcntl_init(void)
808 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY 835 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
809 * is defined as O_NONBLOCK on some platforms and not on others. 836 * is defined as O_NONBLOCK on some platforms and not on others.
810 */ 837 */
811 BUILD_BUG_ON(18 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( 838 BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
812 O_RDONLY | O_WRONLY | O_RDWR | 839 O_RDONLY | O_WRONLY | O_RDWR |
813 O_CREAT | O_EXCL | O_NOCTTY | 840 O_CREAT | O_EXCL | O_NOCTTY |
814 O_TRUNC | O_APPEND | /* O_NONBLOCK | */ 841 O_TRUNC | O_APPEND | /* O_NONBLOCK | */
815 __O_SYNC | O_DSYNC | FASYNC | 842 __O_SYNC | O_DSYNC | FASYNC |
816 O_DIRECT | O_LARGEFILE | O_DIRECTORY | 843 O_DIRECT | O_LARGEFILE | O_DIRECTORY |
817 O_NOFOLLOW | O_NOATIME | O_CLOEXEC | 844 O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
818 __FMODE_EXEC 845 __FMODE_EXEC | O_PATH
819 )); 846 ));
820 847
821 fasync_cache = kmem_cache_create("fasync_cache", 848 fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/fs/fhandle.c b/fs/fhandle.c
new file mode 100644
index 000000000000..bf93ad2bee07
--- /dev/null
+++ b/fs/fhandle.c
@@ -0,0 +1,265 @@
1#include <linux/syscalls.h>
2#include <linux/slab.h>
3#include <linux/fs.h>
4#include <linux/file.h>
5#include <linux/mount.h>
6#include <linux/namei.h>
7#include <linux/exportfs.h>
8#include <linux/fs_struct.h>
9#include <linux/fsnotify.h>
10#include <asm/uaccess.h>
11#include "internal.h"
12
13static long do_sys_name_to_handle(struct path *path,
14 struct file_handle __user *ufh,
15 int __user *mnt_id)
16{
17 long retval;
18 struct file_handle f_handle;
19 int handle_dwords, handle_bytes;
20 struct file_handle *handle = NULL;
21
22 /*
23 * We need t make sure wether the file system
24 * support decoding of the file handle
25 */
26 if (!path->mnt->mnt_sb->s_export_op ||
27 !path->mnt->mnt_sb->s_export_op->fh_to_dentry)
28 return -EOPNOTSUPP;
29
30 if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle)))
31 return -EFAULT;
32
33 if (f_handle.handle_bytes > MAX_HANDLE_SZ)
34 return -EINVAL;
35
36 handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
37 GFP_KERNEL);
38 if (!handle)
39 return -ENOMEM;
40
41 /* convert handle size to multiple of sizeof(u32) */
42 handle_dwords = f_handle.handle_bytes >> 2;
43
44 /* we ask for a non connected handle */
45 retval = exportfs_encode_fh(path->dentry,
46 (struct fid *)handle->f_handle,
47 &handle_dwords, 0);
48 handle->handle_type = retval;
49 /* convert handle size to bytes */
50 handle_bytes = handle_dwords * sizeof(u32);
51 handle->handle_bytes = handle_bytes;
52 if ((handle->handle_bytes > f_handle.handle_bytes) ||
53 (retval == 255) || (retval == -ENOSPC)) {
54 /* As per old exportfs_encode_fh documentation
55 * we could return ENOSPC to indicate overflow
56 * But file system returned 255 always. So handle
57 * both the values
58 */
59 /*
60 * set the handle size to zero so we copy only
61 * non variable part of the file_handle
62 */
63 handle_bytes = 0;
64 retval = -EOVERFLOW;
65 } else
66 retval = 0;
67 /* copy the mount id */
68 if (copy_to_user(mnt_id, &path->mnt->mnt_id, sizeof(*mnt_id)) ||
69 copy_to_user(ufh, handle,
70 sizeof(struct file_handle) + handle_bytes))
71 retval = -EFAULT;
72 kfree(handle);
73 return retval;
74}
75
76/**
77 * sys_name_to_handle_at: convert name to handle
78 * @dfd: directory relative to which name is interpreted if not absolute
79 * @name: name that should be converted to handle.
80 * @handle: resulting file handle
81 * @mnt_id: mount id of the file system containing the file
82 * @flag: flag value to indicate whether to follow symlink or not
83 *
84 * @handle->handle_size indicate the space available to store the
85 * variable part of the file handle in bytes. If there is not
86 * enough space, the field is updated to return the minimum
87 * value required.
88 */
89SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
90 struct file_handle __user *, handle, int __user *, mnt_id,
91 int, flag)
92{
93 struct path path;
94 int lookup_flags;
95 int err;
96
97 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
98 return -EINVAL;
99
100 lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0;
101 if (flag & AT_EMPTY_PATH)
102 lookup_flags |= LOOKUP_EMPTY;
103 err = user_path_at(dfd, name, lookup_flags, &path);
104 if (!err) {
105 err = do_sys_name_to_handle(&path, handle, mnt_id);
106 path_put(&path);
107 }
108 return err;
109}
110
111static struct vfsmount *get_vfsmount_from_fd(int fd)
112{
113 struct path path;
114
115 if (fd == AT_FDCWD) {
116 struct fs_struct *fs = current->fs;
117 spin_lock(&fs->lock);
118 path = fs->pwd;
119 mntget(path.mnt);
120 spin_unlock(&fs->lock);
121 } else {
122 int fput_needed;
123 struct file *file = fget_light(fd, &fput_needed);
124 if (!file)
125 return ERR_PTR(-EBADF);
126 path = file->f_path;
127 mntget(path.mnt);
128 fput_light(file, fput_needed);
129 }
130 return path.mnt;
131}
132
133static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
134{
135 return 1;
136}
137
138static int do_handle_to_path(int mountdirfd, struct file_handle *handle,
139 struct path *path)
140{
141 int retval = 0;
142 int handle_dwords;
143
144 path->mnt = get_vfsmount_from_fd(mountdirfd);
145 if (IS_ERR(path->mnt)) {
146 retval = PTR_ERR(path->mnt);
147 goto out_err;
148 }
149 /* change the handle size to multiple of sizeof(u32) */
150 handle_dwords = handle->handle_bytes >> 2;
151 path->dentry = exportfs_decode_fh(path->mnt,
152 (struct fid *)handle->f_handle,
153 handle_dwords, handle->handle_type,
154 vfs_dentry_acceptable, NULL);
155 if (IS_ERR(path->dentry)) {
156 retval = PTR_ERR(path->dentry);
157 goto out_mnt;
158 }
159 return 0;
160out_mnt:
161 mntput(path->mnt);
162out_err:
163 return retval;
164}
165
166static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
167 struct path *path)
168{
169 int retval = 0;
170 struct file_handle f_handle;
171 struct file_handle *handle = NULL;
172
173 /*
174 * With handle we don't look at the execute bit on the
175 * the directory. Ideally we would like CAP_DAC_SEARCH.
176 * But we don't have that
177 */
178 if (!capable(CAP_DAC_READ_SEARCH)) {
179 retval = -EPERM;
180 goto out_err;
181 }
182 if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) {
183 retval = -EFAULT;
184 goto out_err;
185 }
186 if ((f_handle.handle_bytes > MAX_HANDLE_SZ) ||
187 (f_handle.handle_bytes == 0)) {
188 retval = -EINVAL;
189 goto out_err;
190 }
191 handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
192 GFP_KERNEL);
193 if (!handle) {
194 retval = -ENOMEM;
195 goto out_err;
196 }
197 /* copy the full handle */
198 if (copy_from_user(handle, ufh,
199 sizeof(struct file_handle) +
200 f_handle.handle_bytes)) {
201 retval = -EFAULT;
202 goto out_handle;
203 }
204
205 retval = do_handle_to_path(mountdirfd, handle, path);
206
207out_handle:
208 kfree(handle);
209out_err:
210 return retval;
211}
212
213long do_handle_open(int mountdirfd,
214 struct file_handle __user *ufh, int open_flag)
215{
216 long retval = 0;
217 struct path path;
218 struct file *file;
219 int fd;
220
221 retval = handle_to_path(mountdirfd, ufh, &path);
222 if (retval)
223 return retval;
224
225 fd = get_unused_fd_flags(open_flag);
226 if (fd < 0) {
227 path_put(&path);
228 return fd;
229 }
230 file = file_open_root(path.dentry, path.mnt, "", open_flag);
231 if (IS_ERR(file)) {
232 put_unused_fd(fd);
233 retval = PTR_ERR(file);
234 } else {
235 retval = fd;
236 fsnotify_open(file);
237 fd_install(fd, file);
238 }
239 path_put(&path);
240 return retval;
241}
242
243/**
244 * sys_open_by_handle_at: Open the file handle
245 * @mountdirfd: directory file descriptor
246 * @handle: file handle to be opened
247 * @flag: open flags.
248 *
249 * @mountdirfd indicate the directory file descriptor
250 * of the mount point. file handle is decoded relative
251 * to the vfsmount pointed by the @mountdirfd. @flags
252 * value is same as the open(2) flags.
253 */
254SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd,
255 struct file_handle __user *, handle,
256 int, flags)
257{
258 long ret;
259
260 if (force_o_largefile())
261 flags |= O_LARGEFILE;
262
263 ret = do_handle_open(mountdirfd, handle, flags);
264 return ret;
265}
diff --git a/fs/file_table.c b/fs/file_table.c
index eb36b6b17e26..74a9544ac770 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -276,11 +276,10 @@ struct file *fget(unsigned int fd)
276 rcu_read_lock(); 276 rcu_read_lock();
277 file = fcheck_files(files, fd); 277 file = fcheck_files(files, fd);
278 if (file) { 278 if (file) {
279 if (!atomic_long_inc_not_zero(&file->f_count)) { 279 /* File object ref couldn't be taken */
280 /* File object ref couldn't be taken */ 280 if (file->f_mode & FMODE_PATH ||
281 rcu_read_unlock(); 281 !atomic_long_inc_not_zero(&file->f_count))
282 return NULL; 282 file = NULL;
283 }
284 } 283 }
285 rcu_read_unlock(); 284 rcu_read_unlock();
286 285
@@ -289,6 +288,25 @@ struct file *fget(unsigned int fd)
289 288
290EXPORT_SYMBOL(fget); 289EXPORT_SYMBOL(fget);
291 290
291struct file *fget_raw(unsigned int fd)
292{
293 struct file *file;
294 struct files_struct *files = current->files;
295
296 rcu_read_lock();
297 file = fcheck_files(files, fd);
298 if (file) {
299 /* File object ref couldn't be taken */
300 if (!atomic_long_inc_not_zero(&file->f_count))
301 file = NULL;
302 }
303 rcu_read_unlock();
304
305 return file;
306}
307
308EXPORT_SYMBOL(fget_raw);
309
292/* 310/*
293 * Lightweight file lookup - no refcnt increment if fd table isn't shared. 311 * Lightweight file lookup - no refcnt increment if fd table isn't shared.
294 * 312 *
@@ -313,6 +331,33 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
313 *fput_needed = 0; 331 *fput_needed = 0;
314 if (atomic_read(&files->count) == 1) { 332 if (atomic_read(&files->count) == 1) {
315 file = fcheck_files(files, fd); 333 file = fcheck_files(files, fd);
334 if (file && (file->f_mode & FMODE_PATH))
335 file = NULL;
336 } else {
337 rcu_read_lock();
338 file = fcheck_files(files, fd);
339 if (file) {
340 if (!(file->f_mode & FMODE_PATH) &&
341 atomic_long_inc_not_zero(&file->f_count))
342 *fput_needed = 1;
343 else
344 /* Didn't get the reference, someone's freed */
345 file = NULL;
346 }
347 rcu_read_unlock();
348 }
349
350 return file;
351}
352
353struct file *fget_raw_light(unsigned int fd, int *fput_needed)
354{
355 struct file *file;
356 struct files_struct *files = current->files;
357
358 *fput_needed = 0;
359 if (atomic_read(&files->count) == 1) {
360 file = fcheck_files(files, fd);
316 } else { 361 } else {
317 rcu_read_lock(); 362 rcu_read_lock();
318 file = fcheck_files(files, fd); 363 file = fcheck_files(files, fd);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index bfed8447ed80..8bd0ef9286c3 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -158,7 +158,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
158{ 158{
159 struct inode *inode; 159 struct inode *inode;
160 160
161 if (nd->flags & LOOKUP_RCU) 161 if (nd && nd->flags & LOOKUP_RCU)
162 return -ECHILD; 162 return -ECHILD;
163 163
164 inode = entry->d_inode; 164 inode = entry->d_inode;
@@ -1283,8 +1283,11 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1283 if (err) 1283 if (err)
1284 return err; 1284 return err;
1285 1285
1286 if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc) 1286 if (attr->ia_valid & ATTR_OPEN) {
1287 return 0; 1287 if (fc->atomic_o_trunc)
1288 return 0;
1289 file = NULL;
1290 }
1288 1291
1289 if (attr->ia_valid & ATTR_SIZE) 1292 if (attr->ia_valid & ATTR_SIZE)
1290 is_truncate = true; 1293 is_truncate = true;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 95da1bc1c826..9e0832dbb1e3 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -86,18 +86,52 @@ struct fuse_file *fuse_file_get(struct fuse_file *ff)
86 return ff; 86 return ff;
87} 87}
88 88
89static void fuse_release_async(struct work_struct *work)
90{
91 struct fuse_req *req;
92 struct fuse_conn *fc;
93 struct path path;
94
95 req = container_of(work, struct fuse_req, misc.release.work);
96 path = req->misc.release.path;
97 fc = get_fuse_conn(path.dentry->d_inode);
98
99 fuse_put_request(fc, req);
100 path_put(&path);
101}
102
89static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) 103static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
90{ 104{
91 path_put(&req->misc.release.path); 105 if (fc->destroy_req) {
106 /*
107 * If this is a fuseblk mount, then it's possible that
108 * releasing the path will result in releasing the
109 * super block and sending the DESTROY request. If
110 * the server is single threaded, this would hang.
111 * For this reason do the path_put() in a separate
112 * thread.
113 */
114 atomic_inc(&req->count);
115 INIT_WORK(&req->misc.release.work, fuse_release_async);
116 schedule_work(&req->misc.release.work);
117 } else {
118 path_put(&req->misc.release.path);
119 }
92} 120}
93 121
94static void fuse_file_put(struct fuse_file *ff) 122static void fuse_file_put(struct fuse_file *ff, bool sync)
95{ 123{
96 if (atomic_dec_and_test(&ff->count)) { 124 if (atomic_dec_and_test(&ff->count)) {
97 struct fuse_req *req = ff->reserved_req; 125 struct fuse_req *req = ff->reserved_req;
98 126
99 req->end = fuse_release_end; 127 if (sync) {
100 fuse_request_send_background(ff->fc, req); 128 fuse_request_send(ff->fc, req);
129 path_put(&req->misc.release.path);
130 fuse_put_request(ff->fc, req);
131 } else {
132 req->end = fuse_release_end;
133 fuse_request_send_background(ff->fc, req);
134 }
101 kfree(ff); 135 kfree(ff);
102 } 136 }
103} 137}
@@ -219,8 +253,12 @@ void fuse_release_common(struct file *file, int opcode)
219 * Normally this will send the RELEASE request, however if 253 * Normally this will send the RELEASE request, however if
220 * some asynchronous READ or WRITE requests are outstanding, 254 * some asynchronous READ or WRITE requests are outstanding,
221 * the sending will be delayed. 255 * the sending will be delayed.
256 *
257 * Make the release synchronous if this is a fuseblk mount,
258 * synchronous RELEASE is allowed (and desirable) in this case
259 * because the server can be trusted not to screw up.
222 */ 260 */
223 fuse_file_put(ff); 261 fuse_file_put(ff, ff->fc->destroy_req != NULL);
224} 262}
225 263
226static int fuse_open(struct inode *inode, struct file *file) 264static int fuse_open(struct inode *inode, struct file *file)
@@ -558,7 +596,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
558 page_cache_release(page); 596 page_cache_release(page);
559 } 597 }
560 if (req->ff) 598 if (req->ff)
561 fuse_file_put(req->ff); 599 fuse_file_put(req->ff, false);
562} 600}
563 601
564static void fuse_send_readpages(struct fuse_req *req, struct file *file) 602static void fuse_send_readpages(struct fuse_req *req, struct file *file)
@@ -1137,7 +1175,7 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
1137static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) 1175static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
1138{ 1176{
1139 __free_page(req->pages[0]); 1177 __free_page(req->pages[0]);
1140 fuse_file_put(req->ff); 1178 fuse_file_put(req->ff, false);
1141} 1179}
1142 1180
1143static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) 1181static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index ae5744a2f9e9..d4286947bc2c 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -21,6 +21,7 @@
21#include <linux/rwsem.h> 21#include <linux/rwsem.h>
22#include <linux/rbtree.h> 22#include <linux/rbtree.h>
23#include <linux/poll.h> 23#include <linux/poll.h>
24#include <linux/workqueue.h>
24 25
25/** Max number of pages that can be used in a single read request */ 26/** Max number of pages that can be used in a single read request */
26#define FUSE_MAX_PAGES_PER_REQ 32 27#define FUSE_MAX_PAGES_PER_REQ 32
@@ -262,7 +263,10 @@ struct fuse_req {
262 /** Data for asynchronous requests */ 263 /** Data for asynchronous requests */
263 union { 264 union {
264 struct { 265 struct {
265 struct fuse_release_in in; 266 union {
267 struct fuse_release_in in;
268 struct work_struct work;
269 };
266 struct path path; 270 struct path path;
267 } release; 271 } release;
268 struct fuse_init_in init_in; 272 struct fuse_init_in init_in;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 9e3f68cc1bd1..051b1a084528 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -637,8 +637,10 @@ static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
637 u64 nodeid; 637 u64 nodeid;
638 u32 generation; 638 u32 generation;
639 639
640 if (*max_len < len) 640 if (*max_len < len) {
641 *max_len = len;
641 return 255; 642 return 255;
643 }
642 644
643 nodeid = get_fuse_inode(inode)->nodeid; 645 nodeid = get_fuse_inode(inode)->nodeid;
644 generation = inode->i_generation; 646 generation = inode->i_generation;
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 4a456338b873..0da8da2c991d 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -44,7 +44,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
44 int error; 44 int error;
45 int had_lock = 0; 45 int had_lock = 0;
46 46
47 if (nd->flags & LOOKUP_RCU) 47 if (nd && nd->flags & LOOKUP_RCU)
48 return -ECHILD; 48 return -ECHILD;
49 49
50 parent = dget_parent(dentry); 50 parent = dget_parent(dentry);
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 9023db8184f9..b5a5e60df0d5 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -36,9 +36,13 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
36 struct super_block *sb = inode->i_sb; 36 struct super_block *sb = inode->i_sb;
37 struct gfs2_inode *ip = GFS2_I(inode); 37 struct gfs2_inode *ip = GFS2_I(inode);
38 38
39 if (*len < GFS2_SMALL_FH_SIZE || 39 if (connectable && (*len < GFS2_LARGE_FH_SIZE)) {
40 (connectable && *len < GFS2_LARGE_FH_SIZE)) 40 *len = GFS2_LARGE_FH_SIZE;
41 return 255; 41 return 255;
42 } else if (*len < GFS2_SMALL_FH_SIZE) {
43 *len = GFS2_SMALL_FH_SIZE;
44 return 255;
45 }
42 46
43 fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32); 47 fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
44 fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF); 48 fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 85ba027d1c4d..72c31a315d96 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -59,14 +59,7 @@ static void gfs2_init_gl_aspace_once(void *foo)
59 struct address_space *mapping = (struct address_space *)(gl + 1); 59 struct address_space *mapping = (struct address_space *)(gl + 1);
60 60
61 gfs2_init_glock_once(gl); 61 gfs2_init_glock_once(gl);
62 memset(mapping, 0, sizeof(*mapping)); 62 address_space_init_once(mapping);
63 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
64 spin_lock_init(&mapping->tree_lock);
65 spin_lock_init(&mapping->i_mmap_lock);
66 INIT_LIST_HEAD(&mapping->private_list);
67 spin_lock_init(&mapping->private_lock);
68 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
69 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
70} 63}
71 64
72/** 65/**
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index afa66aaa2237..b4d70b13be92 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -238,46 +238,22 @@ static int hfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
238} 238}
239 239
240/* 240/*
241 * hfs_unlink() 241 * hfs_remove()
242 * 242 *
243 * This is the unlink() entry in the inode_operations structure for 243 * This serves as both unlink() and rmdir() in the inode_operations
244 * regular HFS directories. The purpose is to delete an existing 244 * structure for regular HFS directories. The purpose is to delete
245 * file, given the inode for the parent directory and the name 245 * an existing child, given the inode for the parent directory and
246 * (and its length) of the existing file. 246 * the name (and its length) of the existing directory.
247 */
248static int hfs_unlink(struct inode *dir, struct dentry *dentry)
249{
250 struct inode *inode;
251 int res;
252
253 inode = dentry->d_inode;
254 res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
255 if (res)
256 return res;
257
258 drop_nlink(inode);
259 hfs_delete_inode(inode);
260 inode->i_ctime = CURRENT_TIME_SEC;
261 mark_inode_dirty(inode);
262
263 return res;
264}
265
266/*
267 * hfs_rmdir()
268 * 247 *
269 * This is the rmdir() entry in the inode_operations structure for 248 * HFS does not have hardlinks, so both rmdir and unlink set the
270 * regular HFS directories. The purpose is to delete an existing 249 * link count to 0. The only difference is the emptiness check.
271 * directory, given the inode for the parent directory and the name
272 * (and its length) of the existing directory.
273 */ 250 */
274static int hfs_rmdir(struct inode *dir, struct dentry *dentry) 251static int hfs_remove(struct inode *dir, struct dentry *dentry)
275{ 252{
276 struct inode *inode; 253 struct inode *inode = dentry->d_inode;
277 int res; 254 int res;
278 255
279 inode = dentry->d_inode; 256 if (S_ISDIR(inode->i_mode) && inode->i_size != 2)
280 if (inode->i_size != 2)
281 return -ENOTEMPTY; 257 return -ENOTEMPTY;
282 res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name); 258 res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
283 if (res) 259 if (res)
@@ -307,7 +283,7 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry,
307 283
308 /* Unlink destination if it already exists */ 284 /* Unlink destination if it already exists */
309 if (new_dentry->d_inode) { 285 if (new_dentry->d_inode) {
310 res = hfs_unlink(new_dir, new_dentry); 286 res = hfs_remove(new_dir, new_dentry);
311 if (res) 287 if (res)
312 return res; 288 return res;
313 } 289 }
@@ -332,9 +308,9 @@ const struct file_operations hfs_dir_operations = {
332const struct inode_operations hfs_dir_inode_operations = { 308const struct inode_operations hfs_dir_inode_operations = {
333 .create = hfs_create, 309 .create = hfs_create,
334 .lookup = hfs_lookup, 310 .lookup = hfs_lookup,
335 .unlink = hfs_unlink, 311 .unlink = hfs_remove,
336 .mkdir = hfs_mkdir, 312 .mkdir = hfs_mkdir,
337 .rmdir = hfs_rmdir, 313 .rmdir = hfs_remove,
338 .rename = hfs_rename, 314 .rename = hfs_rename,
339 .setattr = hfs_inode_setattr, 315 .setattr = hfs_inode_setattr,
340}; 316};
diff --git a/fs/inode.c b/fs/inode.c
index da85e56378f3..0647d80accf6 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -295,6 +295,20 @@ static void destroy_inode(struct inode *inode)
295 call_rcu(&inode->i_rcu, i_callback); 295 call_rcu(&inode->i_rcu, i_callback);
296} 296}
297 297
298void address_space_init_once(struct address_space *mapping)
299{
300 memset(mapping, 0, sizeof(*mapping));
301 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
302 spin_lock_init(&mapping->tree_lock);
303 spin_lock_init(&mapping->i_mmap_lock);
304 INIT_LIST_HEAD(&mapping->private_list);
305 spin_lock_init(&mapping->private_lock);
306 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
307 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
308 mutex_init(&mapping->unmap_mutex);
309}
310EXPORT_SYMBOL(address_space_init_once);
311
298/* 312/*
299 * These are initializations that only need to be done 313 * These are initializations that only need to be done
300 * once, because the fields are idempotent across use 314 * once, because the fields are idempotent across use
@@ -308,13 +322,7 @@ void inode_init_once(struct inode *inode)
308 INIT_LIST_HEAD(&inode->i_devices); 322 INIT_LIST_HEAD(&inode->i_devices);
309 INIT_LIST_HEAD(&inode->i_wb_list); 323 INIT_LIST_HEAD(&inode->i_wb_list);
310 INIT_LIST_HEAD(&inode->i_lru); 324 INIT_LIST_HEAD(&inode->i_lru);
311 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); 325 address_space_init_once(&inode->i_data);
312 spin_lock_init(&inode->i_data.tree_lock);
313 spin_lock_init(&inode->i_data.i_mmap_lock);
314 INIT_LIST_HEAD(&inode->i_data.private_list);
315 spin_lock_init(&inode->i_data.private_lock);
316 INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
317 INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
318 i_size_ordered_init(inode); 326 i_size_ordered_init(inode);
319#ifdef CONFIG_FSNOTIFY 327#ifdef CONFIG_FSNOTIFY
320 INIT_HLIST_HEAD(&inode->i_fsnotify_marks); 328 INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
@@ -540,11 +548,14 @@ void evict_inodes(struct super_block *sb)
540/** 548/**
541 * invalidate_inodes - attempt to free all inodes on a superblock 549 * invalidate_inodes - attempt to free all inodes on a superblock
542 * @sb: superblock to operate on 550 * @sb: superblock to operate on
551 * @kill_dirty: flag to guide handling of dirty inodes
543 * 552 *
544 * Attempts to free all inodes for a given superblock. If there were any 553 * Attempts to free all inodes for a given superblock. If there were any
545 * busy inodes return a non-zero value, else zero. 554 * busy inodes return a non-zero value, else zero.
555 * If @kill_dirty is set, discard dirty inodes too, otherwise treat
556 * them as busy.
546 */ 557 */
547int invalidate_inodes(struct super_block *sb) 558int invalidate_inodes(struct super_block *sb, bool kill_dirty)
548{ 559{
549 int busy = 0; 560 int busy = 0;
550 struct inode *inode, *next; 561 struct inode *inode, *next;
@@ -556,6 +567,10 @@ int invalidate_inodes(struct super_block *sb)
556 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 567 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
557 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) 568 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
558 continue; 569 continue;
570 if (inode->i_state & I_DIRTY && !kill_dirty) {
571 busy = 1;
572 continue;
573 }
559 if (atomic_read(&inode->i_count)) { 574 if (atomic_read(&inode->i_count)) {
560 busy = 1; 575 busy = 1;
561 continue; 576 continue;
diff --git a/fs/internal.h b/fs/internal.h
index 0663568b1247..f3d15de44b15 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -106,10 +106,23 @@ extern void put_super(struct super_block *sb);
106struct nameidata; 106struct nameidata;
107extern struct file *nameidata_to_filp(struct nameidata *); 107extern struct file *nameidata_to_filp(struct nameidata *);
108extern void release_open_intent(struct nameidata *); 108extern void release_open_intent(struct nameidata *);
109struct open_flags {
110 int open_flag;
111 int mode;
112 int acc_mode;
113 int intent;
114};
115extern struct file *do_filp_open(int dfd, const char *pathname,
116 const struct open_flags *op, int lookup_flags);
117extern struct file *do_file_open_root(struct dentry *, struct vfsmount *,
118 const char *, const struct open_flags *, int lookup_flags);
119
120extern long do_handle_open(int mountdirfd,
121 struct file_handle __user *ufh, int open_flag);
109 122
110/* 123/*
111 * inode.c 124 * inode.c
112 */ 125 */
113extern int get_nr_dirty_inodes(void); 126extern int get_nr_dirty_inodes(void);
114extern void evict_inodes(struct super_block *); 127extern void evict_inodes(struct super_block *);
115extern int invalidate_inodes(struct super_block *); 128extern int invalidate_inodes(struct super_block *, bool);
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index ed752cb38474..dd4687ff30d0 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -124,9 +124,13 @@ isofs_export_encode_fh(struct dentry *dentry,
124 * offset of the inode and the upper 16 bits of fh32[1] to 124 * offset of the inode and the upper 16 bits of fh32[1] to
125 * hold the offset of the parent. 125 * hold the offset of the parent.
126 */ 126 */
127 127 if (connectable && (len < 5)) {
128 if (len < 3 || (connectable && len < 5)) 128 *max_len = 5;
129 return 255;
130 } else if (len < 3) {
131 *max_len = 3;
129 return 255; 132 return 255;
133 }
130 134
131 len = 3; 135 len = 3;
132 fh32[0] = ei->i_iget5_block; 136 fh32[0] = ei->i_iget5_block;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 81ead850ddb6..3f04a1804931 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -809,9 +809,6 @@ static int jfs_link(struct dentry *old_dentry,
809 if (ip->i_nlink == JFS_LINK_MAX) 809 if (ip->i_nlink == JFS_LINK_MAX)
810 return -EMLINK; 810 return -EMLINK;
811 811
812 if (ip->i_nlink == 0)
813 return -ENOENT;
814
815 dquot_initialize(dir); 812 dquot_initialize(dir);
816 813
817 tid = txBegin(ip->i_sb, 0); 814 tid = txBegin(ip->i_sb, 0);
@@ -1600,7 +1597,7 @@ out:
1600 1597
1601static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd) 1598static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
1602{ 1599{
1603 if (nd->flags & LOOKUP_RCU) 1600 if (nd && nd->flags & LOOKUP_RCU)
1604 return -ECHILD; 1601 return -ECHILD;
1605 /* 1602 /*
1606 * This is not negative dentry. Always valid. 1603 * This is not negative dentry. Always valid.
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index ce7337ddfdbf..6e6777f1b4b2 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -213,7 +213,6 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
213 new_de = minix_find_entry(new_dentry, &new_page); 213 new_de = minix_find_entry(new_dentry, &new_page);
214 if (!new_de) 214 if (!new_de)
215 goto out_dir; 215 goto out_dir;
216 inode_inc_link_count(old_inode);
217 minix_set_link(new_de, new_page, old_inode); 216 minix_set_link(new_de, new_page, old_inode);
218 new_inode->i_ctime = CURRENT_TIME_SEC; 217 new_inode->i_ctime = CURRENT_TIME_SEC;
219 if (dir_de) 218 if (dir_de)
@@ -225,18 +224,15 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
225 if (new_dir->i_nlink >= info->s_link_max) 224 if (new_dir->i_nlink >= info->s_link_max)
226 goto out_dir; 225 goto out_dir;
227 } 226 }
228 inode_inc_link_count(old_inode);
229 err = minix_add_link(new_dentry, old_inode); 227 err = minix_add_link(new_dentry, old_inode);
230 if (err) { 228 if (err)
231 inode_dec_link_count(old_inode);
232 goto out_dir; 229 goto out_dir;
233 }
234 if (dir_de) 230 if (dir_de)
235 inode_inc_link_count(new_dir); 231 inode_inc_link_count(new_dir);
236 } 232 }
237 233
238 minix_delete_entry(old_de, old_page); 234 minix_delete_entry(old_de, old_page);
239 inode_dec_link_count(old_inode); 235 mark_inode_dirty(old_inode);
240 236
241 if (dir_de) { 237 if (dir_de) {
242 minix_set_link(dir_de, dir_page, new_dir); 238 minix_set_link(dir_de, dir_page, new_dir);
diff --git a/fs/namei.c b/fs/namei.c
index 0087cf9c2c6b..0a601cae23de 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -136,7 +136,7 @@ static int do_getname(const char __user *filename, char *page)
136 return retval; 136 return retval;
137} 137}
138 138
139char * getname(const char __user * filename) 139static char *getname_flags(const char __user * filename, int flags)
140{ 140{
141 char *tmp, *result; 141 char *tmp, *result;
142 142
@@ -147,14 +147,21 @@ char * getname(const char __user * filename)
147 147
148 result = tmp; 148 result = tmp;
149 if (retval < 0) { 149 if (retval < 0) {
150 __putname(tmp); 150 if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
151 result = ERR_PTR(retval); 151 __putname(tmp);
152 result = ERR_PTR(retval);
153 }
152 } 154 }
153 } 155 }
154 audit_getname(result); 156 audit_getname(result);
155 return result; 157 return result;
156} 158}
157 159
160char *getname(const char __user * filename)
161{
162 return getname_flags(filename, 0);
163}
164
158#ifdef CONFIG_AUDITSYSCALL 165#ifdef CONFIG_AUDITSYSCALL
159void putname(const char *name) 166void putname(const char *name)
160{ 167{
@@ -401,9 +408,11 @@ static int nameidata_drop_rcu(struct nameidata *nd)
401{ 408{
402 struct fs_struct *fs = current->fs; 409 struct fs_struct *fs = current->fs;
403 struct dentry *dentry = nd->path.dentry; 410 struct dentry *dentry = nd->path.dentry;
411 int want_root = 0;
404 412
405 BUG_ON(!(nd->flags & LOOKUP_RCU)); 413 BUG_ON(!(nd->flags & LOOKUP_RCU));
406 if (nd->root.mnt) { 414 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
415 want_root = 1;
407 spin_lock(&fs->lock); 416 spin_lock(&fs->lock);
408 if (nd->root.mnt != fs->root.mnt || 417 if (nd->root.mnt != fs->root.mnt ||
409 nd->root.dentry != fs->root.dentry) 418 nd->root.dentry != fs->root.dentry)
@@ -414,7 +423,7 @@ static int nameidata_drop_rcu(struct nameidata *nd)
414 goto err; 423 goto err;
415 BUG_ON(nd->inode != dentry->d_inode); 424 BUG_ON(nd->inode != dentry->d_inode);
416 spin_unlock(&dentry->d_lock); 425 spin_unlock(&dentry->d_lock);
417 if (nd->root.mnt) { 426 if (want_root) {
418 path_get(&nd->root); 427 path_get(&nd->root);
419 spin_unlock(&fs->lock); 428 spin_unlock(&fs->lock);
420 } 429 }
@@ -427,7 +436,7 @@ static int nameidata_drop_rcu(struct nameidata *nd)
427err: 436err:
428 spin_unlock(&dentry->d_lock); 437 spin_unlock(&dentry->d_lock);
429err_root: 438err_root:
430 if (nd->root.mnt) 439 if (want_root)
431 spin_unlock(&fs->lock); 440 spin_unlock(&fs->lock);
432 return -ECHILD; 441 return -ECHILD;
433} 442}
@@ -454,9 +463,11 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
454{ 463{
455 struct fs_struct *fs = current->fs; 464 struct fs_struct *fs = current->fs;
456 struct dentry *parent = nd->path.dentry; 465 struct dentry *parent = nd->path.dentry;
466 int want_root = 0;
457 467
458 BUG_ON(!(nd->flags & LOOKUP_RCU)); 468 BUG_ON(!(nd->flags & LOOKUP_RCU));
459 if (nd->root.mnt) { 469 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
470 want_root = 1;
460 spin_lock(&fs->lock); 471 spin_lock(&fs->lock);
461 if (nd->root.mnt != fs->root.mnt || 472 if (nd->root.mnt != fs->root.mnt ||
462 nd->root.dentry != fs->root.dentry) 473 nd->root.dentry != fs->root.dentry)
@@ -476,7 +487,7 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
476 parent->d_count++; 487 parent->d_count++;
477 spin_unlock(&dentry->d_lock); 488 spin_unlock(&dentry->d_lock);
478 spin_unlock(&parent->d_lock); 489 spin_unlock(&parent->d_lock);
479 if (nd->root.mnt) { 490 if (want_root) {
480 path_get(&nd->root); 491 path_get(&nd->root);
481 spin_unlock(&fs->lock); 492 spin_unlock(&fs->lock);
482 } 493 }
@@ -490,7 +501,7 @@ err:
490 spin_unlock(&dentry->d_lock); 501 spin_unlock(&dentry->d_lock);
491 spin_unlock(&parent->d_lock); 502 spin_unlock(&parent->d_lock);
492err_root: 503err_root:
493 if (nd->root.mnt) 504 if (want_root)
494 spin_unlock(&fs->lock); 505 spin_unlock(&fs->lock);
495 return -ECHILD; 506 return -ECHILD;
496} 507}
@@ -498,8 +509,16 @@ err_root:
498/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ 509/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
499static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry) 510static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry)
500{ 511{
501 if (nd->flags & LOOKUP_RCU) 512 if (nd->flags & LOOKUP_RCU) {
502 return nameidata_dentry_drop_rcu(nd, dentry); 513 if (unlikely(nameidata_dentry_drop_rcu(nd, dentry))) {
514 nd->flags &= ~LOOKUP_RCU;
515 if (!(nd->flags & LOOKUP_ROOT))
516 nd->root.mnt = NULL;
517 rcu_read_unlock();
518 br_read_unlock(vfsmount_lock);
519 return -ECHILD;
520 }
521 }
503 return 0; 522 return 0;
504} 523}
505 524
@@ -518,7 +537,8 @@ static int nameidata_drop_rcu_last(struct nameidata *nd)
518 537
519 BUG_ON(!(nd->flags & LOOKUP_RCU)); 538 BUG_ON(!(nd->flags & LOOKUP_RCU));
520 nd->flags &= ~LOOKUP_RCU; 539 nd->flags &= ~LOOKUP_RCU;
521 nd->root.mnt = NULL; 540 if (!(nd->flags & LOOKUP_ROOT))
541 nd->root.mnt = NULL;
522 spin_lock(&dentry->d_lock); 542 spin_lock(&dentry->d_lock);
523 if (!__d_rcu_to_refcount(dentry, nd->seq)) 543 if (!__d_rcu_to_refcount(dentry, nd->seq))
524 goto err_unlock; 544 goto err_unlock;
@@ -539,14 +559,6 @@ err_unlock:
539 return -ECHILD; 559 return -ECHILD;
540} 560}
541 561
542/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
543static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
544{
545 if (likely(nd->flags & LOOKUP_RCU))
546 return nameidata_drop_rcu_last(nd);
547 return 0;
548}
549
550/** 562/**
551 * release_open_intent - free up open intent resources 563 * release_open_intent - free up open intent resources
552 * @nd: pointer to nameidata 564 * @nd: pointer to nameidata
@@ -590,42 +602,8 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
590 return dentry; 602 return dentry;
591} 603}
592 604
593static inline struct dentry *
594do_revalidate_rcu(struct dentry *dentry, struct nameidata *nd)
595{
596 int status = d_revalidate(dentry, nd);
597 if (likely(status > 0))
598 return dentry;
599 if (status == -ECHILD) {
600 if (nameidata_dentry_drop_rcu(nd, dentry))
601 return ERR_PTR(-ECHILD);
602 return do_revalidate(dentry, nd);
603 }
604 if (status < 0)
605 return ERR_PTR(status);
606 /* Don't d_invalidate in rcu-walk mode */
607 if (nameidata_dentry_drop_rcu(nd, dentry))
608 return ERR_PTR(-ECHILD);
609 if (!d_invalidate(dentry)) {
610 dput(dentry);
611 dentry = NULL;
612 }
613 return dentry;
614}
615
616static inline int need_reval_dot(struct dentry *dentry)
617{
618 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
619 return 0;
620
621 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
622 return 0;
623
624 return 1;
625}
626
627/* 605/*
628 * force_reval_path - force revalidation of a dentry 606 * handle_reval_path - force revalidation of a dentry
629 * 607 *
630 * In some situations the path walking code will trust dentries without 608 * In some situations the path walking code will trust dentries without
631 * revalidating them. This causes problems for filesystems that depend on 609 * revalidating them. This causes problems for filesystems that depend on
@@ -639,27 +617,28 @@ static inline int need_reval_dot(struct dentry *dentry)
639 * invalidate the dentry. It's up to the caller to handle putting references 617 * invalidate the dentry. It's up to the caller to handle putting references
640 * to the path if necessary. 618 * to the path if necessary.
641 */ 619 */
642static int 620static inline int handle_reval_path(struct nameidata *nd)
643force_reval_path(struct path *path, struct nameidata *nd)
644{ 621{
622 struct dentry *dentry = nd->path.dentry;
645 int status; 623 int status;
646 struct dentry *dentry = path->dentry;
647 624
648 /* 625 if (likely(!(nd->flags & LOOKUP_JUMPED)))
649 * only check on filesystems where it's possible for the dentry to 626 return 0;
650 * become stale. 627
651 */ 628 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
652 if (!need_reval_dot(dentry)) 629 return 0;
630
631 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
653 return 0; 632 return 0;
654 633
634 /* Note: we do not d_invalidate() */
655 status = d_revalidate(dentry, nd); 635 status = d_revalidate(dentry, nd);
656 if (status > 0) 636 if (status > 0)
657 return 0; 637 return 0;
658 638
659 if (!status) { 639 if (!status)
660 d_invalidate(dentry);
661 status = -ESTALE; 640 status = -ESTALE;
662 } 641
663 return status; 642 return status;
664} 643}
665 644
@@ -728,6 +707,7 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
728 path_put(&nd->path); 707 path_put(&nd->path);
729 nd->path = nd->root; 708 nd->path = nd->root;
730 path_get(&nd->root); 709 path_get(&nd->root);
710 nd->flags |= LOOKUP_JUMPED;
731 } 711 }
732 nd->inode = nd->path.dentry->d_inode; 712 nd->inode = nd->path.dentry->d_inode;
733 713
@@ -757,20 +737,44 @@ static inline void path_to_nameidata(const struct path *path,
757 nd->path.dentry = path->dentry; 737 nd->path.dentry = path->dentry;
758} 738}
759 739
740static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
741{
742 struct inode *inode = link->dentry->d_inode;
743 if (!IS_ERR(cookie) && inode->i_op->put_link)
744 inode->i_op->put_link(link->dentry, nd, cookie);
745 path_put(link);
746}
747
760static __always_inline int 748static __always_inline int
761__do_follow_link(const struct path *link, struct nameidata *nd, void **p) 749follow_link(struct path *link, struct nameidata *nd, void **p)
762{ 750{
763 int error; 751 int error;
764 struct dentry *dentry = link->dentry; 752 struct dentry *dentry = link->dentry;
765 753
766 BUG_ON(nd->flags & LOOKUP_RCU); 754 BUG_ON(nd->flags & LOOKUP_RCU);
767 755
756 if (unlikely(current->total_link_count >= 40)) {
757 *p = ERR_PTR(-ELOOP); /* no ->put_link(), please */
758 path_put_conditional(link, nd);
759 path_put(&nd->path);
760 return -ELOOP;
761 }
762 cond_resched();
763 current->total_link_count++;
764
768 touch_atime(link->mnt, dentry); 765 touch_atime(link->mnt, dentry);
769 nd_set_link(nd, NULL); 766 nd_set_link(nd, NULL);
770 767
771 if (link->mnt == nd->path.mnt) 768 if (link->mnt == nd->path.mnt)
772 mntget(link->mnt); 769 mntget(link->mnt);
773 770
771 error = security_inode_follow_link(link->dentry, nd);
772 if (error) {
773 *p = ERR_PTR(error); /* no ->put_link(), please */
774 path_put(&nd->path);
775 return error;
776 }
777
774 nd->last_type = LAST_BIND; 778 nd->last_type = LAST_BIND;
775 *p = dentry->d_inode->i_op->follow_link(dentry, nd); 779 *p = dentry->d_inode->i_op->follow_link(dentry, nd);
776 error = PTR_ERR(*p); 780 error = PTR_ERR(*p);
@@ -780,56 +784,18 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
780 if (s) 784 if (s)
781 error = __vfs_follow_link(nd, s); 785 error = __vfs_follow_link(nd, s);
782 else if (nd->last_type == LAST_BIND) { 786 else if (nd->last_type == LAST_BIND) {
783 error = force_reval_path(&nd->path, nd); 787 nd->flags |= LOOKUP_JUMPED;
784 if (error) 788 nd->inode = nd->path.dentry->d_inode;
789 if (nd->inode->i_op->follow_link) {
790 /* stepped on a _really_ weird one */
785 path_put(&nd->path); 791 path_put(&nd->path);
792 error = -ELOOP;
793 }
786 } 794 }
787 } 795 }
788 return error; 796 return error;
789} 797}
790 798
791/*
792 * This limits recursive symlink follows to 8, while
793 * limiting consecutive symlinks to 40.
794 *
795 * Without that kind of total limit, nasty chains of consecutive
796 * symlinks can cause almost arbitrarily long lookups.
797 */
798static inline int do_follow_link(struct inode *inode, struct path *path, struct nameidata *nd)
799{
800 void *cookie;
801 int err = -ELOOP;
802
803 /* We drop rcu-walk here */
804 if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
805 return -ECHILD;
806 BUG_ON(inode != path->dentry->d_inode);
807
808 if (current->link_count >= MAX_NESTED_LINKS)
809 goto loop;
810 if (current->total_link_count >= 40)
811 goto loop;
812 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
813 cond_resched();
814 err = security_inode_follow_link(path->dentry, nd);
815 if (err)
816 goto loop;
817 current->link_count++;
818 current->total_link_count++;
819 nd->depth++;
820 err = __do_follow_link(path, nd, &cookie);
821 if (!IS_ERR(cookie) && path->dentry->d_inode->i_op->put_link)
822 path->dentry->d_inode->i_op->put_link(path->dentry, nd, cookie);
823 path_put(path);
824 current->link_count--;
825 nd->depth--;
826 return err;
827loop:
828 path_put_conditional(path, nd);
829 path_put(&nd->path);
830 return err;
831}
832
833static int follow_up_rcu(struct path *path) 799static int follow_up_rcu(struct path *path)
834{ 800{
835 struct vfsmount *parent; 801 struct vfsmount *parent;
@@ -1068,7 +1034,7 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1068 1034
1069 seq = read_seqcount_begin(&parent->d_seq); 1035 seq = read_seqcount_begin(&parent->d_seq);
1070 if (read_seqcount_retry(&old->d_seq, nd->seq)) 1036 if (read_seqcount_retry(&old->d_seq, nd->seq))
1071 return -ECHILD; 1037 goto failed;
1072 inode = parent->d_inode; 1038 inode = parent->d_inode;
1073 nd->path.dentry = parent; 1039 nd->path.dentry = parent;
1074 nd->seq = seq; 1040 nd->seq = seq;
@@ -1081,8 +1047,15 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1081 } 1047 }
1082 __follow_mount_rcu(nd, &nd->path, &inode, true); 1048 __follow_mount_rcu(nd, &nd->path, &inode, true);
1083 nd->inode = inode; 1049 nd->inode = inode;
1084
1085 return 0; 1050 return 0;
1051
1052failed:
1053 nd->flags &= ~LOOKUP_RCU;
1054 if (!(nd->flags & LOOKUP_ROOT))
1055 nd->root.mnt = NULL;
1056 rcu_read_unlock();
1057 br_read_unlock(vfsmount_lock);
1058 return -ECHILD;
1086} 1059}
1087 1060
1088/* 1061/*
@@ -1216,68 +1189,85 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
1216{ 1189{
1217 struct vfsmount *mnt = nd->path.mnt; 1190 struct vfsmount *mnt = nd->path.mnt;
1218 struct dentry *dentry, *parent = nd->path.dentry; 1191 struct dentry *dentry, *parent = nd->path.dentry;
1219 struct inode *dir; 1192 int need_reval = 1;
1193 int status = 1;
1220 int err; 1194 int err;
1221 1195
1222 /* 1196 /*
1223 * See if the low-level filesystem might want
1224 * to use its own hash..
1225 */
1226 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
1227 err = parent->d_op->d_hash(parent, nd->inode, name);
1228 if (err < 0)
1229 return err;
1230 }
1231
1232 /*
1233 * Rename seqlock is not required here because in the off chance 1197 * Rename seqlock is not required here because in the off chance
1234 * of a false negative due to a concurrent rename, we're going to 1198 * of a false negative due to a concurrent rename, we're going to
1235 * do the non-racy lookup, below. 1199 * do the non-racy lookup, below.
1236 */ 1200 */
1237 if (nd->flags & LOOKUP_RCU) { 1201 if (nd->flags & LOOKUP_RCU) {
1238 unsigned seq; 1202 unsigned seq;
1239
1240 *inode = nd->inode; 1203 *inode = nd->inode;
1241 dentry = __d_lookup_rcu(parent, name, &seq, inode); 1204 dentry = __d_lookup_rcu(parent, name, &seq, inode);
1242 if (!dentry) { 1205 if (!dentry)
1243 if (nameidata_drop_rcu(nd)) 1206 goto unlazy;
1244 return -ECHILD; 1207
1245 goto need_lookup;
1246 }
1247 /* Memory barrier in read_seqcount_begin of child is enough */ 1208 /* Memory barrier in read_seqcount_begin of child is enough */
1248 if (__read_seqcount_retry(&parent->d_seq, nd->seq)) 1209 if (__read_seqcount_retry(&parent->d_seq, nd->seq))
1249 return -ECHILD; 1210 return -ECHILD;
1250
1251 nd->seq = seq; 1211 nd->seq = seq;
1212
1252 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { 1213 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1253 dentry = do_revalidate_rcu(dentry, nd); 1214 status = d_revalidate(dentry, nd);
1254 if (!dentry) 1215 if (unlikely(status <= 0)) {
1255 goto need_lookup; 1216 if (status != -ECHILD)
1256 if (IS_ERR(dentry)) 1217 need_reval = 0;
1257 goto fail; 1218 goto unlazy;
1258 if (!(nd->flags & LOOKUP_RCU)) 1219 }
1259 goto done;
1260 } 1220 }
1261 path->mnt = mnt; 1221 path->mnt = mnt;
1262 path->dentry = dentry; 1222 path->dentry = dentry;
1263 if (likely(__follow_mount_rcu(nd, path, inode, false))) 1223 if (likely(__follow_mount_rcu(nd, path, inode, false)))
1264 return 0; 1224 return 0;
1265 if (nameidata_drop_rcu(nd)) 1225unlazy:
1266 return -ECHILD; 1226 if (dentry) {
1267 /* fallthru */ 1227 if (nameidata_dentry_drop_rcu(nd, dentry))
1228 return -ECHILD;
1229 } else {
1230 if (nameidata_drop_rcu(nd))
1231 return -ECHILD;
1232 }
1233 } else {
1234 dentry = __d_lookup(parent, name);
1268 } 1235 }
1269 dentry = __d_lookup(parent, name); 1236
1270 if (!dentry) 1237retry:
1271 goto need_lookup; 1238 if (unlikely(!dentry)) {
1272found: 1239 struct inode *dir = parent->d_inode;
1273 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { 1240 BUG_ON(nd->inode != dir);
1274 dentry = do_revalidate(dentry, nd); 1241
1275 if (!dentry) 1242 mutex_lock(&dir->i_mutex);
1276 goto need_lookup; 1243 dentry = d_lookup(parent, name);
1277 if (IS_ERR(dentry)) 1244 if (likely(!dentry)) {
1278 goto fail; 1245 dentry = d_alloc_and_lookup(parent, name, nd);
1246 if (IS_ERR(dentry)) {
1247 mutex_unlock(&dir->i_mutex);
1248 return PTR_ERR(dentry);
1249 }
1250 /* known good */
1251 need_reval = 0;
1252 status = 1;
1253 }
1254 mutex_unlock(&dir->i_mutex);
1279 } 1255 }
1280done: 1256 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
1257 status = d_revalidate(dentry, nd);
1258 if (unlikely(status <= 0)) {
1259 if (status < 0) {
1260 dput(dentry);
1261 return status;
1262 }
1263 if (!d_invalidate(dentry)) {
1264 dput(dentry);
1265 dentry = NULL;
1266 need_reval = 1;
1267 goto retry;
1268 }
1269 }
1270
1281 path->mnt = mnt; 1271 path->mnt = mnt;
1282 path->dentry = dentry; 1272 path->dentry = dentry;
1283 err = follow_managed(path, nd->flags); 1273 err = follow_managed(path, nd->flags);
@@ -1287,39 +1277,113 @@ done:
1287 } 1277 }
1288 *inode = path->dentry->d_inode; 1278 *inode = path->dentry->d_inode;
1289 return 0; 1279 return 0;
1280}
1290 1281
1291need_lookup: 1282static inline int may_lookup(struct nameidata *nd)
1292 dir = parent->d_inode; 1283{
1293 BUG_ON(nd->inode != dir); 1284 if (nd->flags & LOOKUP_RCU) {
1285 int err = exec_permission(nd->inode, IPERM_FLAG_RCU);
1286 if (err != -ECHILD)
1287 return err;
1288 if (nameidata_drop_rcu(nd))
1289 return -ECHILD;
1290 }
1291 return exec_permission(nd->inode, 0);
1292}
1294 1293
1295 mutex_lock(&dir->i_mutex); 1294static inline int handle_dots(struct nameidata *nd, int type)
1296 /* 1295{
1297 * First re-do the cached lookup just in case it was created 1296 if (type == LAST_DOTDOT) {
1298 * while we waited for the directory semaphore, or the first 1297 if (nd->flags & LOOKUP_RCU) {
1299 * lookup failed due to an unrelated rename. 1298 if (follow_dotdot_rcu(nd))
1300 * 1299 return -ECHILD;
1301 * This could use version numbering or similar to avoid unnecessary 1300 } else
1302 * cache lookups, but then we'd have to do the first lookup in the 1301 follow_dotdot(nd);
1303 * non-racy way. However in the common case here, everything should 1302 }
1304 * be hot in cache, so would it be a big win? 1303 return 0;
1305 */ 1304}
1306 dentry = d_lookup(parent, name); 1305
1307 if (likely(!dentry)) { 1306static void terminate_walk(struct nameidata *nd)
1308 dentry = d_alloc_and_lookup(parent, name, nd); 1307{
1309 mutex_unlock(&dir->i_mutex); 1308 if (!(nd->flags & LOOKUP_RCU)) {
1310 if (IS_ERR(dentry)) 1309 path_put(&nd->path);
1311 goto fail; 1310 } else {
1312 goto done; 1311 nd->flags &= ~LOOKUP_RCU;
1312 if (!(nd->flags & LOOKUP_ROOT))
1313 nd->root.mnt = NULL;
1314 rcu_read_unlock();
1315 br_read_unlock(vfsmount_lock);
1313 } 1316 }
1317}
1318
1319static inline int walk_component(struct nameidata *nd, struct path *path,
1320 struct qstr *name, int type, int follow)
1321{
1322 struct inode *inode;
1323 int err;
1314 /* 1324 /*
1315 * Uhhuh! Nasty case: the cache was re-populated while 1325 * "." and ".." are special - ".." especially so because it has
1316 * we waited on the semaphore. Need to revalidate. 1326 * to be able to know about the current root directory and
1327 * parent relationships.
1317 */ 1328 */
1318 mutex_unlock(&dir->i_mutex); 1329 if (unlikely(type != LAST_NORM))
1319 goto found; 1330 return handle_dots(nd, type);
1331 err = do_lookup(nd, name, path, &inode);
1332 if (unlikely(err)) {
1333 terminate_walk(nd);
1334 return err;
1335 }
1336 if (!inode) {
1337 path_to_nameidata(path, nd);
1338 terminate_walk(nd);
1339 return -ENOENT;
1340 }
1341 if (unlikely(inode->i_op->follow_link) && follow) {
1342 if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
1343 return -ECHILD;
1344 BUG_ON(inode != path->dentry->d_inode);
1345 return 1;
1346 }
1347 path_to_nameidata(path, nd);
1348 nd->inode = inode;
1349 return 0;
1350}
1320 1351
1321fail: 1352/*
1322 return PTR_ERR(dentry); 1353 * This limits recursive symlink follows to 8, while
1354 * limiting consecutive symlinks to 40.
1355 *
1356 * Without that kind of total limit, nasty chains of consecutive
1357 * symlinks can cause almost arbitrarily long lookups.
1358 */
1359static inline int nested_symlink(struct path *path, struct nameidata *nd)
1360{
1361 int res;
1362
1363 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
1364 if (unlikely(current->link_count >= MAX_NESTED_LINKS)) {
1365 path_put_conditional(path, nd);
1366 path_put(&nd->path);
1367 return -ELOOP;
1368 }
1369
1370 nd->depth++;
1371 current->link_count++;
1372
1373 do {
1374 struct path link = *path;
1375 void *cookie;
1376
1377 res = follow_link(&link, nd, &cookie);
1378 if (!res)
1379 res = walk_component(nd, path, &nd->last,
1380 nd->last_type, LOOKUP_FOLLOW);
1381 put_link(nd, &link, cookie);
1382 } while (res > 0);
1383
1384 current->link_count--;
1385 nd->depth--;
1386 return res;
1323} 1387}
1324 1388
1325/* 1389/*
@@ -1339,30 +1403,18 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1339 while (*name=='/') 1403 while (*name=='/')
1340 name++; 1404 name++;
1341 if (!*name) 1405 if (!*name)
1342 goto return_reval; 1406 return 0;
1343
1344 if (nd->depth)
1345 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
1346 1407
1347 /* At this point we know we have a real path component. */ 1408 /* At this point we know we have a real path component. */
1348 for(;;) { 1409 for(;;) {
1349 struct inode *inode;
1350 unsigned long hash; 1410 unsigned long hash;
1351 struct qstr this; 1411 struct qstr this;
1352 unsigned int c; 1412 unsigned int c;
1413 int type;
1353 1414
1354 nd->flags |= LOOKUP_CONTINUE; 1415 nd->flags |= LOOKUP_CONTINUE;
1355 if (nd->flags & LOOKUP_RCU) { 1416
1356 err = exec_permission(nd->inode, IPERM_FLAG_RCU); 1417 err = may_lookup(nd);
1357 if (err == -ECHILD) {
1358 if (nameidata_drop_rcu(nd))
1359 return -ECHILD;
1360 goto exec_again;
1361 }
1362 } else {
1363exec_again:
1364 err = exec_permission(nd->inode, 0);
1365 }
1366 if (err) 1418 if (err)
1367 break; 1419 break;
1368 1420
@@ -1378,52 +1430,43 @@ exec_again:
1378 this.len = name - (const char *) this.name; 1430 this.len = name - (const char *) this.name;
1379 this.hash = end_name_hash(hash); 1431 this.hash = end_name_hash(hash);
1380 1432
1433 type = LAST_NORM;
1434 if (this.name[0] == '.') switch (this.len) {
1435 case 2:
1436 if (this.name[1] == '.') {
1437 type = LAST_DOTDOT;
1438 nd->flags |= LOOKUP_JUMPED;
1439 }
1440 break;
1441 case 1:
1442 type = LAST_DOT;
1443 }
1444 if (likely(type == LAST_NORM)) {
1445 struct dentry *parent = nd->path.dentry;
1446 nd->flags &= ~LOOKUP_JUMPED;
1447 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
1448 err = parent->d_op->d_hash(parent, nd->inode,
1449 &this);
1450 if (err < 0)
1451 break;
1452 }
1453 }
1454
1381 /* remove trailing slashes? */ 1455 /* remove trailing slashes? */
1382 if (!c) 1456 if (!c)
1383 goto last_component; 1457 goto last_component;
1384 while (*++name == '/'); 1458 while (*++name == '/');
1385 if (!*name) 1459 if (!*name)
1386 goto last_with_slashes; 1460 goto last_component;
1387 1461
1388 /* 1462 err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW);
1389 * "." and ".." are special - ".." especially so because it has 1463 if (err < 0)
1390 * to be able to know about the current root directory and 1464 return err;
1391 * parent relationships.
1392 */
1393 if (this.name[0] == '.') switch (this.len) {
1394 default:
1395 break;
1396 case 2:
1397 if (this.name[1] != '.')
1398 break;
1399 if (nd->flags & LOOKUP_RCU) {
1400 if (follow_dotdot_rcu(nd))
1401 return -ECHILD;
1402 } else
1403 follow_dotdot(nd);
1404 /* fallthrough */
1405 case 1:
1406 continue;
1407 }
1408 /* This does the actual lookups.. */
1409 err = do_lookup(nd, &this, &next, &inode);
1410 if (err)
1411 break;
1412 err = -ENOENT;
1413 if (!inode)
1414 goto out_dput;
1415 1465
1416 if (inode->i_op->follow_link) { 1466 if (err) {
1417 err = do_follow_link(inode, &next, nd); 1467 err = nested_symlink(&next, nd);
1418 if (err) 1468 if (err)
1419 goto return_err; 1469 return err;
1420 nd->inode = nd->path.dentry->d_inode;
1421 err = -ENOENT;
1422 if (!nd->inode)
1423 break;
1424 } else {
1425 path_to_nameidata(&next, nd);
1426 nd->inode = inode;
1427 } 1470 }
1428 err = -ENOTDIR; 1471 err = -ENOTDIR;
1429 if (!nd->inode->i_op->lookup) 1472 if (!nd->inode->i_op->lookup)
@@ -1431,209 +1474,109 @@ exec_again:
1431 continue; 1474 continue;
1432 /* here ends the main loop */ 1475 /* here ends the main loop */
1433 1476
1434last_with_slashes:
1435 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
1436last_component: 1477last_component:
1437 /* Clear LOOKUP_CONTINUE iff it was previously unset */ 1478 /* Clear LOOKUP_CONTINUE iff it was previously unset */
1438 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE; 1479 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
1439 if (lookup_flags & LOOKUP_PARENT)
1440 goto lookup_parent;
1441 if (this.name[0] == '.') switch (this.len) {
1442 default:
1443 break;
1444 case 2:
1445 if (this.name[1] != '.')
1446 break;
1447 if (nd->flags & LOOKUP_RCU) {
1448 if (follow_dotdot_rcu(nd))
1449 return -ECHILD;
1450 } else
1451 follow_dotdot(nd);
1452 /* fallthrough */
1453 case 1:
1454 goto return_reval;
1455 }
1456 err = do_lookup(nd, &this, &next, &inode);
1457 if (err)
1458 break;
1459 if (inode && unlikely(inode->i_op->follow_link) &&
1460 (lookup_flags & LOOKUP_FOLLOW)) {
1461 err = do_follow_link(inode, &next, nd);
1462 if (err)
1463 goto return_err;
1464 nd->inode = nd->path.dentry->d_inode;
1465 } else {
1466 path_to_nameidata(&next, nd);
1467 nd->inode = inode;
1468 }
1469 err = -ENOENT;
1470 if (!nd->inode)
1471 break;
1472 if (lookup_flags & LOOKUP_DIRECTORY) {
1473 err = -ENOTDIR;
1474 if (!nd->inode->i_op->lookup)
1475 break;
1476 }
1477 goto return_base;
1478lookup_parent:
1479 nd->last = this; 1480 nd->last = this;
1480 nd->last_type = LAST_NORM; 1481 nd->last_type = type;
1481 if (this.name[0] != '.')
1482 goto return_base;
1483 if (this.len == 1)
1484 nd->last_type = LAST_DOT;
1485 else if (this.len == 2 && this.name[1] == '.')
1486 nd->last_type = LAST_DOTDOT;
1487 else
1488 goto return_base;
1489return_reval:
1490 /*
1491 * We bypassed the ordinary revalidation routines.
1492 * We may need to check the cached dentry for staleness.
1493 */
1494 if (need_reval_dot(nd->path.dentry)) {
1495 if (nameidata_drop_rcu_last_maybe(nd))
1496 return -ECHILD;
1497 /* Note: we do not d_invalidate() */
1498 err = d_revalidate(nd->path.dentry, nd);
1499 if (!err)
1500 err = -ESTALE;
1501 if (err < 0)
1502 break;
1503 return 0;
1504 }
1505return_base:
1506 if (nameidata_drop_rcu_last_maybe(nd))
1507 return -ECHILD;
1508 return 0; 1482 return 0;
1509out_dput:
1510 if (!(nd->flags & LOOKUP_RCU))
1511 path_put_conditional(&next, nd);
1512 break;
1513 } 1483 }
1514 if (!(nd->flags & LOOKUP_RCU)) 1484 terminate_walk(nd);
1515 path_put(&nd->path);
1516return_err:
1517 return err; 1485 return err;
1518} 1486}
1519 1487
1520static inline int path_walk_rcu(const char *name, struct nameidata *nd) 1488static int path_init(int dfd, const char *name, unsigned int flags,
1521{ 1489 struct nameidata *nd, struct file **fp)
1522 current->total_link_count = 0;
1523
1524 return link_path_walk(name, nd);
1525}
1526
1527static inline int path_walk_simple(const char *name, struct nameidata *nd)
1528{
1529 current->total_link_count = 0;
1530
1531 return link_path_walk(name, nd);
1532}
1533
1534static int path_walk(const char *name, struct nameidata *nd)
1535{
1536 struct path save = nd->path;
1537 int result;
1538
1539 current->total_link_count = 0;
1540
1541 /* make sure the stuff we saved doesn't go away */
1542 path_get(&save);
1543
1544 result = link_path_walk(name, nd);
1545 if (result == -ESTALE) {
1546 /* nd->path had been dropped */
1547 current->total_link_count = 0;
1548 nd->path = save;
1549 path_get(&nd->path);
1550 nd->flags |= LOOKUP_REVAL;
1551 result = link_path_walk(name, nd);
1552 }
1553
1554 path_put(&save);
1555
1556 return result;
1557}
1558
1559static void path_finish_rcu(struct nameidata *nd)
1560{
1561 if (nd->flags & LOOKUP_RCU) {
1562 /* RCU dangling. Cancel it. */
1563 nd->flags &= ~LOOKUP_RCU;
1564 nd->root.mnt = NULL;
1565 rcu_read_unlock();
1566 br_read_unlock(vfsmount_lock);
1567 }
1568 if (nd->file)
1569 fput(nd->file);
1570}
1571
1572static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
1573{ 1490{
1574 int retval = 0; 1491 int retval = 0;
1575 int fput_needed; 1492 int fput_needed;
1576 struct file *file; 1493 struct file *file;
1577 1494
1578 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1495 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1579 nd->flags = flags | LOOKUP_RCU; 1496 nd->flags = flags | LOOKUP_JUMPED;
1580 nd->depth = 0; 1497 nd->depth = 0;
1498 if (flags & LOOKUP_ROOT) {
1499 struct inode *inode = nd->root.dentry->d_inode;
1500 if (*name) {
1501 if (!inode->i_op->lookup)
1502 return -ENOTDIR;
1503 retval = inode_permission(inode, MAY_EXEC);
1504 if (retval)
1505 return retval;
1506 }
1507 nd->path = nd->root;
1508 nd->inode = inode;
1509 if (flags & LOOKUP_RCU) {
1510 br_read_lock(vfsmount_lock);
1511 rcu_read_lock();
1512 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1513 } else {
1514 path_get(&nd->path);
1515 }
1516 return 0;
1517 }
1518
1581 nd->root.mnt = NULL; 1519 nd->root.mnt = NULL;
1582 nd->file = NULL;
1583 1520
1584 if (*name=='/') { 1521 if (*name=='/') {
1585 struct fs_struct *fs = current->fs; 1522 if (flags & LOOKUP_RCU) {
1586 unsigned seq; 1523 br_read_lock(vfsmount_lock);
1587 1524 rcu_read_lock();
1588 br_read_lock(vfsmount_lock); 1525 set_root_rcu(nd);
1589 rcu_read_lock(); 1526 } else {
1590 1527 set_root(nd);
1591 do { 1528 path_get(&nd->root);
1592 seq = read_seqcount_begin(&fs->seq); 1529 }
1593 nd->root = fs->root; 1530 nd->path = nd->root;
1594 nd->path = nd->root;
1595 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1596 } while (read_seqcount_retry(&fs->seq, seq));
1597
1598 } else if (dfd == AT_FDCWD) { 1531 } else if (dfd == AT_FDCWD) {
1599 struct fs_struct *fs = current->fs; 1532 if (flags & LOOKUP_RCU) {
1600 unsigned seq; 1533 struct fs_struct *fs = current->fs;
1601 1534 unsigned seq;
1602 br_read_lock(vfsmount_lock);
1603 rcu_read_lock();
1604 1535
1605 do { 1536 br_read_lock(vfsmount_lock);
1606 seq = read_seqcount_begin(&fs->seq); 1537 rcu_read_lock();
1607 nd->path = fs->pwd;
1608 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1609 } while (read_seqcount_retry(&fs->seq, seq));
1610 1538
1539 do {
1540 seq = read_seqcount_begin(&fs->seq);
1541 nd->path = fs->pwd;
1542 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1543 } while (read_seqcount_retry(&fs->seq, seq));
1544 } else {
1545 get_fs_pwd(current->fs, &nd->path);
1546 }
1611 } else { 1547 } else {
1612 struct dentry *dentry; 1548 struct dentry *dentry;
1613 1549
1614 file = fget_light(dfd, &fput_needed); 1550 file = fget_raw_light(dfd, &fput_needed);
1615 retval = -EBADF; 1551 retval = -EBADF;
1616 if (!file) 1552 if (!file)
1617 goto out_fail; 1553 goto out_fail;
1618 1554
1619 dentry = file->f_path.dentry; 1555 dentry = file->f_path.dentry;
1620 1556
1621 retval = -ENOTDIR; 1557 if (*name) {
1622 if (!S_ISDIR(dentry->d_inode->i_mode)) 1558 retval = -ENOTDIR;
1623 goto fput_fail; 1559 if (!S_ISDIR(dentry->d_inode->i_mode))
1560 goto fput_fail;
1624 1561
1625 retval = file_permission(file, MAY_EXEC); 1562 retval = file_permission(file, MAY_EXEC);
1626 if (retval) 1563 if (retval)
1627 goto fput_fail; 1564 goto fput_fail;
1565 }
1628 1566
1629 nd->path = file->f_path; 1567 nd->path = file->f_path;
1630 if (fput_needed) 1568 if (flags & LOOKUP_RCU) {
1631 nd->file = file; 1569 if (fput_needed)
1632 1570 *fp = file;
1633 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1571 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1634 br_read_lock(vfsmount_lock); 1572 br_read_lock(vfsmount_lock);
1635 rcu_read_lock(); 1573 rcu_read_lock();
1574 } else {
1575 path_get(&file->f_path);
1576 fput_light(file, fput_needed);
1577 }
1636 } 1578 }
1579
1637 nd->inode = nd->path.dentry->d_inode; 1580 nd->inode = nd->path.dentry->d_inode;
1638 return 0; 1581 return 0;
1639 1582
@@ -1643,60 +1586,23 @@ out_fail:
1643 return retval; 1586 return retval;
1644} 1587}
1645 1588
1646static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) 1589static inline int lookup_last(struct nameidata *nd, struct path *path)
1647{ 1590{
1648 int retval = 0; 1591 if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
1649 int fput_needed; 1592 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
1650 struct file *file;
1651 1593
1652 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1594 nd->flags &= ~LOOKUP_PARENT;
1653 nd->flags = flags; 1595 return walk_component(nd, path, &nd->last, nd->last_type,
1654 nd->depth = 0; 1596 nd->flags & LOOKUP_FOLLOW);
1655 nd->root.mnt = NULL;
1656
1657 if (*name=='/') {
1658 set_root(nd);
1659 nd->path = nd->root;
1660 path_get(&nd->root);
1661 } else if (dfd == AT_FDCWD) {
1662 get_fs_pwd(current->fs, &nd->path);
1663 } else {
1664 struct dentry *dentry;
1665
1666 file = fget_light(dfd, &fput_needed);
1667 retval = -EBADF;
1668 if (!file)
1669 goto out_fail;
1670
1671 dentry = file->f_path.dentry;
1672
1673 retval = -ENOTDIR;
1674 if (!S_ISDIR(dentry->d_inode->i_mode))
1675 goto fput_fail;
1676
1677 retval = file_permission(file, MAY_EXEC);
1678 if (retval)
1679 goto fput_fail;
1680
1681 nd->path = file->f_path;
1682 path_get(&file->f_path);
1683
1684 fput_light(file, fput_needed);
1685 }
1686 nd->inode = nd->path.dentry->d_inode;
1687 return 0;
1688
1689fput_fail:
1690 fput_light(file, fput_needed);
1691out_fail:
1692 return retval;
1693} 1597}
1694 1598
1695/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1599/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
1696static int do_path_lookup(int dfd, const char *name, 1600static int path_lookupat(int dfd, const char *name,
1697 unsigned int flags, struct nameidata *nd) 1601 unsigned int flags, struct nameidata *nd)
1698{ 1602{
1699 int retval; 1603 struct file *base = NULL;
1604 struct path path;
1605 int err;
1700 1606
1701 /* 1607 /*
1702 * Path walking is largely split up into 2 different synchronisation 1608 * Path walking is largely split up into 2 different synchronisation
@@ -1712,44 +1618,75 @@ static int do_path_lookup(int dfd, const char *name,
1712 * be handled by restarting a traditional ref-walk (which will always 1618 * be handled by restarting a traditional ref-walk (which will always
1713 * be able to complete). 1619 * be able to complete).
1714 */ 1620 */
1715 retval = path_init_rcu(dfd, name, flags, nd); 1621 err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);
1716 if (unlikely(retval)) 1622
1717 return retval; 1623 if (unlikely(err))
1718 retval = path_walk_rcu(name, nd); 1624 return err;
1719 path_finish_rcu(nd); 1625
1720 if (nd->root.mnt) { 1626 current->total_link_count = 0;
1721 path_put(&nd->root); 1627 err = link_path_walk(name, nd);
1722 nd->root.mnt = NULL; 1628
1629 if (!err && !(flags & LOOKUP_PARENT)) {
1630 err = lookup_last(nd, &path);
1631 while (err > 0) {
1632 void *cookie;
1633 struct path link = path;
1634 nd->flags |= LOOKUP_PARENT;
1635 err = follow_link(&link, nd, &cookie);
1636 if (!err)
1637 err = lookup_last(nd, &path);
1638 put_link(nd, &link, cookie);
1639 }
1723 } 1640 }
1724 1641
1725 if (unlikely(retval == -ECHILD || retval == -ESTALE)) { 1642 if (nd->flags & LOOKUP_RCU) {
1726 /* slower, locked walk */ 1643 /* went all way through without dropping RCU */
1727 if (retval == -ESTALE) 1644 BUG_ON(err);
1728 flags |= LOOKUP_REVAL; 1645 if (nameidata_drop_rcu_last(nd))
1729 retval = path_init(dfd, name, flags, nd); 1646 err = -ECHILD;
1730 if (unlikely(retval)) 1647 }
1731 return retval; 1648
1732 retval = path_walk(name, nd); 1649 if (!err)
1733 if (nd->root.mnt) { 1650 err = handle_reval_path(nd);
1734 path_put(&nd->root); 1651
1735 nd->root.mnt = NULL; 1652 if (!err && nd->flags & LOOKUP_DIRECTORY) {
1653 if (!nd->inode->i_op->lookup) {
1654 path_put(&nd->path);
1655 return -ENOTDIR;
1736 } 1656 }
1737 } 1657 }
1738 1658
1659 if (base)
1660 fput(base);
1661
1662 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
1663 path_put(&nd->root);
1664 nd->root.mnt = NULL;
1665 }
1666 return err;
1667}
1668
1669static int do_path_lookup(int dfd, const char *name,
1670 unsigned int flags, struct nameidata *nd)
1671{
1672 int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
1673 if (unlikely(retval == -ECHILD))
1674 retval = path_lookupat(dfd, name, flags, nd);
1675 if (unlikely(retval == -ESTALE))
1676 retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
1677
1739 if (likely(!retval)) { 1678 if (likely(!retval)) {
1740 if (unlikely(!audit_dummy_context())) { 1679 if (unlikely(!audit_dummy_context())) {
1741 if (nd->path.dentry && nd->inode) 1680 if (nd->path.dentry && nd->inode)
1742 audit_inode(name, nd->path.dentry); 1681 audit_inode(name, nd->path.dentry);
1743 } 1682 }
1744 } 1683 }
1745
1746 return retval; 1684 return retval;
1747} 1685}
1748 1686
1749int path_lookup(const char *name, unsigned int flags, 1687int kern_path_parent(const char *name, struct nameidata *nd)
1750 struct nameidata *nd)
1751{ 1688{
1752 return do_path_lookup(AT_FDCWD, name, flags, nd); 1689 return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd);
1753} 1690}
1754 1691
1755int kern_path(const char *name, unsigned int flags, struct path *path) 1692int kern_path(const char *name, unsigned int flags, struct path *path)
@@ -1773,29 +1710,10 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1773 const char *name, unsigned int flags, 1710 const char *name, unsigned int flags,
1774 struct nameidata *nd) 1711 struct nameidata *nd)
1775{ 1712{
1776 int retval; 1713 nd->root.dentry = dentry;
1777 1714 nd->root.mnt = mnt;
1778 /* same as do_path_lookup */ 1715 /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */
1779 nd->last_type = LAST_ROOT; 1716 return do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, nd);
1780 nd->flags = flags;
1781 nd->depth = 0;
1782
1783 nd->path.dentry = dentry;
1784 nd->path.mnt = mnt;
1785 path_get(&nd->path);
1786 nd->root = nd->path;
1787 path_get(&nd->root);
1788 nd->inode = nd->path.dentry->d_inode;
1789
1790 retval = path_walk(name, nd);
1791 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
1792 nd->inode))
1793 audit_inode(name, nd->path.dentry);
1794
1795 path_put(&nd->root);
1796 nd->root.mnt = NULL;
1797
1798 return retval;
1799} 1717}
1800 1718
1801static struct dentry *__lookup_hash(struct qstr *name, 1719static struct dentry *__lookup_hash(struct qstr *name,
@@ -1810,17 +1728,6 @@ static struct dentry *__lookup_hash(struct qstr *name,
1810 return ERR_PTR(err); 1728 return ERR_PTR(err);
1811 1729
1812 /* 1730 /*
1813 * See if the low-level filesystem might want
1814 * to use its own hash..
1815 */
1816 if (base->d_flags & DCACHE_OP_HASH) {
1817 err = base->d_op->d_hash(base, inode, name);
1818 dentry = ERR_PTR(err);
1819 if (err < 0)
1820 goto out;
1821 }
1822
1823 /*
1824 * Don't bother with __d_lookup: callers are for creat as 1731 * Don't bother with __d_lookup: callers are for creat as
1825 * well as unlink, so a lot of the time it would cost 1732 * well as unlink, so a lot of the time it would cost
1826 * a double lookup. 1733 * a double lookup.
@@ -1832,7 +1739,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
1832 1739
1833 if (!dentry) 1740 if (!dentry)
1834 dentry = d_alloc_and_lookup(base, name, nd); 1741 dentry = d_alloc_and_lookup(base, name, nd);
1835out: 1742
1836 return dentry; 1743 return dentry;
1837} 1744}
1838 1745
@@ -1846,28 +1753,6 @@ static struct dentry *lookup_hash(struct nameidata *nd)
1846 return __lookup_hash(&nd->last, nd->path.dentry, nd); 1753 return __lookup_hash(&nd->last, nd->path.dentry, nd);
1847} 1754}
1848 1755
1849static int __lookup_one_len(const char *name, struct qstr *this,
1850 struct dentry *base, int len)
1851{
1852 unsigned long hash;
1853 unsigned int c;
1854
1855 this->name = name;
1856 this->len = len;
1857 if (!len)
1858 return -EACCES;
1859
1860 hash = init_name_hash();
1861 while (len--) {
1862 c = *(const unsigned char *)name++;
1863 if (c == '/' || c == '\0')
1864 return -EACCES;
1865 hash = partial_name_hash(c, hash);
1866 }
1867 this->hash = end_name_hash(hash);
1868 return 0;
1869}
1870
1871/** 1756/**
1872 * lookup_one_len - filesystem helper to lookup single pathname component 1757 * lookup_one_len - filesystem helper to lookup single pathname component
1873 * @name: pathname component to lookup 1758 * @name: pathname component to lookup
@@ -1881,14 +1766,34 @@ static int __lookup_one_len(const char *name, struct qstr *this,
1881 */ 1766 */
1882struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) 1767struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1883{ 1768{
1884 int err;
1885 struct qstr this; 1769 struct qstr this;
1770 unsigned long hash;
1771 unsigned int c;
1886 1772
1887 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); 1773 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
1888 1774
1889 err = __lookup_one_len(name, &this, base, len); 1775 this.name = name;
1890 if (err) 1776 this.len = len;
1891 return ERR_PTR(err); 1777 if (!len)
1778 return ERR_PTR(-EACCES);
1779
1780 hash = init_name_hash();
1781 while (len--) {
1782 c = *(const unsigned char *)name++;
1783 if (c == '/' || c == '\0')
1784 return ERR_PTR(-EACCES);
1785 hash = partial_name_hash(c, hash);
1786 }
1787 this.hash = end_name_hash(hash);
1788 /*
1789 * See if the low-level filesystem might want
1790 * to use its own hash..
1791 */
1792 if (base->d_flags & DCACHE_OP_HASH) {
1793 int err = base->d_op->d_hash(base, base->d_inode, &this);
1794 if (err < 0)
1795 return ERR_PTR(err);
1796 }
1892 1797
1893 return __lookup_hash(&this, base, NULL); 1798 return __lookup_hash(&this, base, NULL);
1894} 1799}
@@ -1897,7 +1802,7 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,
1897 struct path *path) 1802 struct path *path)
1898{ 1803{
1899 struct nameidata nd; 1804 struct nameidata nd;
1900 char *tmp = getname(name); 1805 char *tmp = getname_flags(name, flags);
1901 int err = PTR_ERR(tmp); 1806 int err = PTR_ERR(tmp);
1902 if (!IS_ERR(tmp)) { 1807 if (!IS_ERR(tmp)) {
1903 1808
@@ -2077,12 +1982,16 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
2077 return error; 1982 return error;
2078} 1983}
2079 1984
2080int may_open(struct path *path, int acc_mode, int flag) 1985static int may_open(struct path *path, int acc_mode, int flag)
2081{ 1986{
2082 struct dentry *dentry = path->dentry; 1987 struct dentry *dentry = path->dentry;
2083 struct inode *inode = dentry->d_inode; 1988 struct inode *inode = dentry->d_inode;
2084 int error; 1989 int error;
2085 1990
1991 /* O_PATH? */
1992 if (!acc_mode)
1993 return 0;
1994
2086 if (!inode) 1995 if (!inode)
2087 return -ENOENT; 1996 return -ENOENT;
2088 1997
@@ -2151,34 +2060,6 @@ static int handle_truncate(struct file *filp)
2151} 2060}
2152 2061
2153/* 2062/*
2154 * Be careful about ever adding any more callers of this
2155 * function. Its flags must be in the namei format, not
2156 * what get passed to sys_open().
2157 */
2158static int __open_namei_create(struct nameidata *nd, struct path *path,
2159 int open_flag, int mode)
2160{
2161 int error;
2162 struct dentry *dir = nd->path.dentry;
2163
2164 if (!IS_POSIXACL(dir->d_inode))
2165 mode &= ~current_umask();
2166 error = security_path_mknod(&nd->path, path->dentry, mode, 0);
2167 if (error)
2168 goto out_unlock;
2169 error = vfs_create(dir->d_inode, path->dentry, mode, nd);
2170out_unlock:
2171 mutex_unlock(&dir->d_inode->i_mutex);
2172 dput(nd->path.dentry);
2173 nd->path.dentry = path->dentry;
2174
2175 if (error)
2176 return error;
2177 /* Don't check for write permission, don't truncate */
2178 return may_open(&nd->path, 0, open_flag & ~O_TRUNC);
2179}
2180
2181/*
2182 * Note that while the flag value (low two bits) for sys_open means: 2063 * Note that while the flag value (low two bits) for sys_open means:
2183 * 00 - read-only 2064 * 00 - read-only
2184 * 01 - write-only 2065 * 01 - write-only
@@ -2202,126 +2083,115 @@ static inline int open_to_namei_flags(int flag)
2202 return flag; 2083 return flag;
2203} 2084}
2204 2085
2205static int open_will_truncate(int flag, struct inode *inode)
2206{
2207 /*
2208 * We'll never write to the fs underlying
2209 * a device file.
2210 */
2211 if (special_file(inode->i_mode))
2212 return 0;
2213 return (flag & O_TRUNC);
2214}
2215
2216static struct file *finish_open(struct nameidata *nd,
2217 int open_flag, int acc_mode)
2218{
2219 struct file *filp;
2220 int will_truncate;
2221 int error;
2222
2223 will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode);
2224 if (will_truncate) {
2225 error = mnt_want_write(nd->path.mnt);
2226 if (error)
2227 goto exit;
2228 }
2229 error = may_open(&nd->path, acc_mode, open_flag);
2230 if (error) {
2231 if (will_truncate)
2232 mnt_drop_write(nd->path.mnt);
2233 goto exit;
2234 }
2235 filp = nameidata_to_filp(nd);
2236 if (!IS_ERR(filp)) {
2237 error = ima_file_check(filp, acc_mode);
2238 if (error) {
2239 fput(filp);
2240 filp = ERR_PTR(error);
2241 }
2242 }
2243 if (!IS_ERR(filp)) {
2244 if (will_truncate) {
2245 error = handle_truncate(filp);
2246 if (error) {
2247 fput(filp);
2248 filp = ERR_PTR(error);
2249 }
2250 }
2251 }
2252 /*
2253 * It is now safe to drop the mnt write
2254 * because the filp has had a write taken
2255 * on its behalf.
2256 */
2257 if (will_truncate)
2258 mnt_drop_write(nd->path.mnt);
2259 path_put(&nd->path);
2260 return filp;
2261
2262exit:
2263 path_put(&nd->path);
2264 return ERR_PTR(error);
2265}
2266
2267/* 2086/*
2268 * Handle O_CREAT case for do_filp_open 2087 * Handle the last step of open()
2269 */ 2088 */
2270static struct file *do_last(struct nameidata *nd, struct path *path, 2089static struct file *do_last(struct nameidata *nd, struct path *path,
2271 int open_flag, int acc_mode, 2090 const struct open_flags *op, const char *pathname)
2272 int mode, const char *pathname)
2273{ 2091{
2274 struct dentry *dir = nd->path.dentry; 2092 struct dentry *dir = nd->path.dentry;
2093 struct dentry *dentry;
2094 int open_flag = op->open_flag;
2095 int will_truncate = open_flag & O_TRUNC;
2096 int want_write = 0;
2097 int acc_mode = op->acc_mode;
2275 struct file *filp; 2098 struct file *filp;
2276 int error = -EISDIR; 2099 int error;
2100
2101 nd->flags &= ~LOOKUP_PARENT;
2102 nd->flags |= op->intent;
2277 2103
2278 switch (nd->last_type) { 2104 switch (nd->last_type) {
2279 case LAST_DOTDOT: 2105 case LAST_DOTDOT:
2280 follow_dotdot(nd);
2281 dir = nd->path.dentry;
2282 case LAST_DOT: 2106 case LAST_DOT:
2283 if (need_reval_dot(dir)) { 2107 error = handle_dots(nd, nd->last_type);
2284 int status = d_revalidate(nd->path.dentry, nd); 2108 if (error)
2285 if (!status) 2109 return ERR_PTR(error);
2286 status = -ESTALE;
2287 if (status < 0) {
2288 error = status;
2289 goto exit;
2290 }
2291 }
2292 /* fallthrough */ 2110 /* fallthrough */
2293 case LAST_ROOT: 2111 case LAST_ROOT:
2294 goto exit; 2112 if (nd->flags & LOOKUP_RCU) {
2113 if (nameidata_drop_rcu_last(nd))
2114 return ERR_PTR(-ECHILD);
2115 }
2116 error = handle_reval_path(nd);
2117 if (error)
2118 goto exit;
2119 audit_inode(pathname, nd->path.dentry);
2120 if (open_flag & O_CREAT) {
2121 error = -EISDIR;
2122 goto exit;
2123 }
2124 goto ok;
2295 case LAST_BIND: 2125 case LAST_BIND:
2126 /* can't be RCU mode here */
2127 error = handle_reval_path(nd);
2128 if (error)
2129 goto exit;
2296 audit_inode(pathname, dir); 2130 audit_inode(pathname, dir);
2297 goto ok; 2131 goto ok;
2298 } 2132 }
2299 2133
2134 if (!(open_flag & O_CREAT)) {
2135 int symlink_ok = 0;
2136 if (nd->last.name[nd->last.len])
2137 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
2138 if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
2139 symlink_ok = 1;
2140 /* we _can_ be in RCU mode here */
2141 error = walk_component(nd, path, &nd->last, LAST_NORM,
2142 !symlink_ok);
2143 if (error < 0)
2144 return ERR_PTR(error);
2145 if (error) /* symlink */
2146 return NULL;
2147 /* sayonara */
2148 if (nd->flags & LOOKUP_RCU) {
2149 if (nameidata_drop_rcu_last(nd))
2150 return ERR_PTR(-ECHILD);
2151 }
2152
2153 error = -ENOTDIR;
2154 if (nd->flags & LOOKUP_DIRECTORY) {
2155 if (!nd->inode->i_op->lookup)
2156 goto exit;
2157 }
2158 audit_inode(pathname, nd->path.dentry);
2159 goto ok;
2160 }
2161
2162 /* create side of things */
2163
2164 if (nd->flags & LOOKUP_RCU) {
2165 if (nameidata_drop_rcu_last(nd))
2166 return ERR_PTR(-ECHILD);
2167 }
2168
2169 audit_inode(pathname, dir);
2170 error = -EISDIR;
2300 /* trailing slashes? */ 2171 /* trailing slashes? */
2301 if (nd->last.name[nd->last.len]) 2172 if (nd->last.name[nd->last.len])
2302 goto exit; 2173 goto exit;
2303 2174
2304 mutex_lock(&dir->d_inode->i_mutex); 2175 mutex_lock(&dir->d_inode->i_mutex);
2305 2176
2306 path->dentry = lookup_hash(nd); 2177 dentry = lookup_hash(nd);
2307 path->mnt = nd->path.mnt; 2178 error = PTR_ERR(dentry);
2308 2179 if (IS_ERR(dentry)) {
2309 error = PTR_ERR(path->dentry);
2310 if (IS_ERR(path->dentry)) {
2311 mutex_unlock(&dir->d_inode->i_mutex); 2180 mutex_unlock(&dir->d_inode->i_mutex);
2312 goto exit; 2181 goto exit;
2313 } 2182 }
2314 2183
2315 if (IS_ERR(nd->intent.open.file)) { 2184 path->dentry = dentry;
2316 error = PTR_ERR(nd->intent.open.file); 2185 path->mnt = nd->path.mnt;
2317 goto exit_mutex_unlock;
2318 }
2319 2186
2320 /* Negative dentry, just create the file */ 2187 /* Negative dentry, just create the file */
2321 if (!path->dentry->d_inode) { 2188 if (!dentry->d_inode) {
2189 int mode = op->mode;
2190 if (!IS_POSIXACL(dir->d_inode))
2191 mode &= ~current_umask();
2322 /* 2192 /*
2323 * This write is needed to ensure that a 2193 * This write is needed to ensure that a
2324 * ro->rw transition does not occur between 2194 * rw->ro transition does not occur between
2325 * the time when the file is created and when 2195 * the time when the file is created and when
2326 * a permanent write count is taken through 2196 * a permanent write count is taken through
2327 * the 'struct file' in nameidata_to_filp(). 2197 * the 'struct file' in nameidata_to_filp().
@@ -2329,22 +2199,21 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2329 error = mnt_want_write(nd->path.mnt); 2199 error = mnt_want_write(nd->path.mnt);
2330 if (error) 2200 if (error)
2331 goto exit_mutex_unlock; 2201 goto exit_mutex_unlock;
2332 error = __open_namei_create(nd, path, open_flag, mode); 2202 want_write = 1;
2333 if (error) { 2203 /* Don't check for write permission, don't truncate */
2334 mnt_drop_write(nd->path.mnt); 2204 open_flag &= ~O_TRUNC;
2335 goto exit; 2205 will_truncate = 0;
2336 } 2206 acc_mode = MAY_OPEN;
2337 filp = nameidata_to_filp(nd); 2207 error = security_path_mknod(&nd->path, dentry, mode, 0);
2338 mnt_drop_write(nd->path.mnt); 2208 if (error)
2339 path_put(&nd->path); 2209 goto exit_mutex_unlock;
2340 if (!IS_ERR(filp)) { 2210 error = vfs_create(dir->d_inode, dentry, mode, nd);
2341 error = ima_file_check(filp, acc_mode); 2211 if (error)
2342 if (error) { 2212 goto exit_mutex_unlock;
2343 fput(filp); 2213 mutex_unlock(&dir->d_inode->i_mutex);
2344 filp = ERR_PTR(error); 2214 dput(nd->path.dentry);
2345 } 2215 nd->path.dentry = dentry;
2346 } 2216 goto common;
2347 return filp;
2348 } 2217 }
2349 2218
2350 /* 2219 /*
@@ -2374,7 +2243,40 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2374 if (S_ISDIR(nd->inode->i_mode)) 2243 if (S_ISDIR(nd->inode->i_mode))
2375 goto exit; 2244 goto exit;
2376ok: 2245ok:
2377 filp = finish_open(nd, open_flag, acc_mode); 2246 if (!S_ISREG(nd->inode->i_mode))
2247 will_truncate = 0;
2248
2249 if (will_truncate) {
2250 error = mnt_want_write(nd->path.mnt);
2251 if (error)
2252 goto exit;
2253 want_write = 1;
2254 }
2255common:
2256 error = may_open(&nd->path, acc_mode, open_flag);
2257 if (error)
2258 goto exit;
2259 filp = nameidata_to_filp(nd);
2260 if (!IS_ERR(filp)) {
2261 error = ima_file_check(filp, op->acc_mode);
2262 if (error) {
2263 fput(filp);
2264 filp = ERR_PTR(error);
2265 }
2266 }
2267 if (!IS_ERR(filp)) {
2268 if (will_truncate) {
2269 error = handle_truncate(filp);
2270 if (error) {
2271 fput(filp);
2272 filp = ERR_PTR(error);
2273 }
2274 }
2275 }
2276out:
2277 if (want_write)
2278 mnt_drop_write(nd->path.mnt);
2279 path_put(&nd->path);
2378 return filp; 2280 return filp;
2379 2281
2380exit_mutex_unlock: 2282exit_mutex_unlock:
@@ -2382,197 +2284,103 @@ exit_mutex_unlock:
2382exit_dput: 2284exit_dput:
2383 path_put_conditional(path, nd); 2285 path_put_conditional(path, nd);
2384exit: 2286exit:
2385 path_put(&nd->path); 2287 filp = ERR_PTR(error);
2386 return ERR_PTR(error); 2288 goto out;
2387} 2289}
2388 2290
2389/* 2291static struct file *path_openat(int dfd, const char *pathname,
2390 * Note that the low bits of the passed in "open_flag" 2292 struct nameidata *nd, const struct open_flags *op, int flags)
2391 * are not the same as in the local variable "flag". See
2392 * open_to_namei_flags() for more details.
2393 */
2394struct file *do_filp_open(int dfd, const char *pathname,
2395 int open_flag, int mode, int acc_mode)
2396{ 2293{
2294 struct file *base = NULL;
2397 struct file *filp; 2295 struct file *filp;
2398 struct nameidata nd;
2399 int error;
2400 struct path path; 2296 struct path path;
2401 int count = 0; 2297 int error;
2402 int flag = open_to_namei_flags(open_flag);
2403 int flags;
2404
2405 if (!(open_flag & O_CREAT))
2406 mode = 0;
2407
2408 /* Must never be set by userspace */
2409 open_flag &= ~FMODE_NONOTIFY;
2410
2411 /*
2412 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
2413 * check for O_DSYNC if the need any syncing at all we enforce it's
2414 * always set instead of having to deal with possibly weird behaviour
2415 * for malicious applications setting only __O_SYNC.
2416 */
2417 if (open_flag & __O_SYNC)
2418 open_flag |= O_DSYNC;
2419
2420 if (!acc_mode)
2421 acc_mode = MAY_OPEN | ACC_MODE(open_flag);
2422
2423 /* O_TRUNC implies we need access checks for write permissions */
2424 if (open_flag & O_TRUNC)
2425 acc_mode |= MAY_WRITE;
2426
2427 /* Allow the LSM permission hook to distinguish append
2428 access from general write access. */
2429 if (open_flag & O_APPEND)
2430 acc_mode |= MAY_APPEND;
2431
2432 flags = LOOKUP_OPEN;
2433 if (open_flag & O_CREAT) {
2434 flags |= LOOKUP_CREATE;
2435 if (open_flag & O_EXCL)
2436 flags |= LOOKUP_EXCL;
2437 }
2438 if (open_flag & O_DIRECTORY)
2439 flags |= LOOKUP_DIRECTORY;
2440 if (!(open_flag & O_NOFOLLOW))
2441 flags |= LOOKUP_FOLLOW;
2442 2298
2443 filp = get_empty_filp(); 2299 filp = get_empty_filp();
2444 if (!filp) 2300 if (!filp)
2445 return ERR_PTR(-ENFILE); 2301 return ERR_PTR(-ENFILE);
2446 2302
2447 filp->f_flags = open_flag; 2303 filp->f_flags = op->open_flag;
2448 nd.intent.open.file = filp; 2304 nd->intent.open.file = filp;
2449 nd.intent.open.flags = flag; 2305 nd->intent.open.flags = open_to_namei_flags(op->open_flag);
2450 nd.intent.open.create_mode = mode; 2306 nd->intent.open.create_mode = op->mode;
2451
2452 if (open_flag & O_CREAT)
2453 goto creat;
2454 2307
2455 /* !O_CREAT, simple open */ 2308 error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base);
2456 error = do_path_lookup(dfd, pathname, flags, &nd);
2457 if (unlikely(error)) 2309 if (unlikely(error))
2458 goto out_filp; 2310 goto out_filp;
2459 error = -ELOOP;
2460 if (!(nd.flags & LOOKUP_FOLLOW)) {
2461 if (nd.inode->i_op->follow_link)
2462 goto out_path;
2463 }
2464 error = -ENOTDIR;
2465 if (nd.flags & LOOKUP_DIRECTORY) {
2466 if (!nd.inode->i_op->lookup)
2467 goto out_path;
2468 }
2469 audit_inode(pathname, nd.path.dentry);
2470 filp = finish_open(&nd, open_flag, acc_mode);
2471 release_open_intent(&nd);
2472 return filp;
2473 2311
2474creat: 2312 current->total_link_count = 0;
2475 /* OK, have to create the file. Find the parent. */ 2313 error = link_path_walk(pathname, nd);
2476 error = path_init_rcu(dfd, pathname,
2477 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
2478 if (error)
2479 goto out_filp;
2480 error = path_walk_rcu(pathname, &nd);
2481 path_finish_rcu(&nd);
2482 if (unlikely(error == -ECHILD || error == -ESTALE)) {
2483 /* slower, locked walk */
2484 if (error == -ESTALE) {
2485reval:
2486 flags |= LOOKUP_REVAL;
2487 }
2488 error = path_init(dfd, pathname,
2489 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
2490 if (error)
2491 goto out_filp;
2492
2493 error = path_walk_simple(pathname, &nd);
2494 }
2495 if (unlikely(error)) 2314 if (unlikely(error))
2496 goto out_filp; 2315 goto out_filp;
2497 if (unlikely(!audit_dummy_context()))
2498 audit_inode(pathname, nd.path.dentry);
2499 2316
2500 /* 2317 filp = do_last(nd, &path, op, pathname);
2501 * We have the parent and last component.
2502 */
2503 nd.flags = flags;
2504 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
2505 while (unlikely(!filp)) { /* trailing symlink */ 2318 while (unlikely(!filp)) { /* trailing symlink */
2506 struct path link = path; 2319 struct path link = path;
2507 struct inode *linki = link.dentry->d_inode;
2508 void *cookie; 2320 void *cookie;
2509 error = -ELOOP; 2321 if (!(nd->flags & LOOKUP_FOLLOW)) {
2510 if (!(nd.flags & LOOKUP_FOLLOW)) 2322 path_put_conditional(&path, nd);
2511 goto exit_dput; 2323 path_put(&nd->path);
2512 if (count++ == 32) 2324 filp = ERR_PTR(-ELOOP);
2513 goto exit_dput; 2325 break;
2514 /*
2515 * This is subtle. Instead of calling do_follow_link() we do
2516 * the thing by hands. The reason is that this way we have zero
2517 * link_count and path_walk() (called from ->follow_link)
2518 * honoring LOOKUP_PARENT. After that we have the parent and
2519 * last component, i.e. we are in the same situation as after
2520 * the first path_walk(). Well, almost - if the last component
2521 * is normal we get its copy stored in nd->last.name and we will
2522 * have to putname() it when we are done. Procfs-like symlinks
2523 * just set LAST_BIND.
2524 */
2525 nd.flags |= LOOKUP_PARENT;
2526 error = security_inode_follow_link(link.dentry, &nd);
2527 if (error)
2528 goto exit_dput;
2529 error = __do_follow_link(&link, &nd, &cookie);
2530 if (unlikely(error)) {
2531 if (!IS_ERR(cookie) && linki->i_op->put_link)
2532 linki->i_op->put_link(link.dentry, &nd, cookie);
2533 /* nd.path had been dropped */
2534 nd.path = link;
2535 goto out_path;
2536 } 2326 }
2537 nd.flags &= ~LOOKUP_PARENT; 2327 nd->flags |= LOOKUP_PARENT;
2538 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 2328 nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
2539 if (linki->i_op->put_link) 2329 error = follow_link(&link, nd, &cookie);
2540 linki->i_op->put_link(link.dentry, &nd, cookie); 2330 if (unlikely(error))
2541 path_put(&link); 2331 filp = ERR_PTR(error);
2332 else
2333 filp = do_last(nd, &path, op, pathname);
2334 put_link(nd, &link, cookie);
2542 } 2335 }
2543out: 2336out:
2544 if (nd.root.mnt) 2337 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
2545 path_put(&nd.root); 2338 path_put(&nd->root);
2546 if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL)) 2339 if (base)
2547 goto reval; 2340 fput(base);
2548 release_open_intent(&nd); 2341 release_open_intent(nd);
2549 return filp; 2342 return filp;
2550 2343
2551exit_dput:
2552 path_put_conditional(&path, &nd);
2553out_path:
2554 path_put(&nd.path);
2555out_filp: 2344out_filp:
2556 filp = ERR_PTR(error); 2345 filp = ERR_PTR(error);
2557 goto out; 2346 goto out;
2558} 2347}
2559 2348
2560/** 2349struct file *do_filp_open(int dfd, const char *pathname,
2561 * filp_open - open file and return file pointer 2350 const struct open_flags *op, int flags)
2562 * 2351{
2563 * @filename: path to open 2352 struct nameidata nd;
2564 * @flags: open flags as per the open(2) second argument 2353 struct file *filp;
2565 * @mode: mode for the new file if O_CREAT is set, else ignored 2354
2566 * 2355 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU);
2567 * This is the helper to open a file from kernelspace if you really 2356 if (unlikely(filp == ERR_PTR(-ECHILD)))
2568 * have to. But in generally you should not do this, so please move 2357 filp = path_openat(dfd, pathname, &nd, op, flags);
2569 * along, nothing to see here.. 2358 if (unlikely(filp == ERR_PTR(-ESTALE)))
2570 */ 2359 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL);
2571struct file *filp_open(const char *filename, int flags, int mode) 2360 return filp;
2361}
2362
2363struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
2364 const char *name, const struct open_flags *op, int flags)
2572{ 2365{
2573 return do_filp_open(AT_FDCWD, filename, flags, mode, 0); 2366 struct nameidata nd;
2367 struct file *file;
2368
2369 nd.root.mnt = mnt;
2370 nd.root.dentry = dentry;
2371
2372 flags |= LOOKUP_ROOT;
2373
2374 if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN)
2375 return ERR_PTR(-ELOOP);
2376
2377 file = path_openat(-1, name, &nd, op, flags | LOOKUP_RCU);
2378 if (unlikely(file == ERR_PTR(-ECHILD)))
2379 file = path_openat(-1, name, &nd, op, flags);
2380 if (unlikely(file == ERR_PTR(-ESTALE)))
2381 file = path_openat(-1, name, &nd, op, flags | LOOKUP_REVAL);
2382 return file;
2574} 2383}
2575EXPORT_SYMBOL(filp_open);
2576 2384
2577/** 2385/**
2578 * lookup_create - lookup a dentry, creating it if it doesn't exist 2386 * lookup_create - lookup a dentry, creating it if it doesn't exist
@@ -3111,7 +2919,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
3111 return error; 2919 return error;
3112 2920
3113 mutex_lock(&inode->i_mutex); 2921 mutex_lock(&inode->i_mutex);
3114 error = dir->i_op->link(old_dentry, dir, new_dentry); 2922 /* Make sure we don't allow creating hardlink to an unlinked file */
2923 if (inode->i_nlink == 0)
2924 error = -ENOENT;
2925 else
2926 error = dir->i_op->link(old_dentry, dir, new_dentry);
3115 mutex_unlock(&inode->i_mutex); 2927 mutex_unlock(&inode->i_mutex);
3116 if (!error) 2928 if (!error)
3117 fsnotify_link(dir, inode, new_dentry); 2929 fsnotify_link(dir, inode, new_dentry);
@@ -3133,15 +2945,27 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
3133 struct dentry *new_dentry; 2945 struct dentry *new_dentry;
3134 struct nameidata nd; 2946 struct nameidata nd;
3135 struct path old_path; 2947 struct path old_path;
2948 int how = 0;
3136 int error; 2949 int error;
3137 char *to; 2950 char *to;
3138 2951
3139 if ((flags & ~AT_SYMLINK_FOLLOW) != 0) 2952 if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
3140 return -EINVAL; 2953 return -EINVAL;
2954 /*
2955 * To use null names we require CAP_DAC_READ_SEARCH
2956 * This ensures that not everyone will be able to create
2957 * handlink using the passed filedescriptor.
2958 */
2959 if (flags & AT_EMPTY_PATH) {
2960 if (!capable(CAP_DAC_READ_SEARCH))
2961 return -ENOENT;
2962 how = LOOKUP_EMPTY;
2963 }
2964
2965 if (flags & AT_SYMLINK_FOLLOW)
2966 how |= LOOKUP_FOLLOW;
3141 2967
3142 error = user_path_at(olddfd, oldname, 2968 error = user_path_at(olddfd, oldname, how, &old_path);
3143 flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
3144 &old_path);
3145 if (error) 2969 if (error)
3146 return error; 2970 return error;
3147 2971
@@ -3578,7 +3402,7 @@ EXPORT_SYMBOL(page_readlink);
3578EXPORT_SYMBOL(__page_symlink); 3402EXPORT_SYMBOL(__page_symlink);
3579EXPORT_SYMBOL(page_symlink); 3403EXPORT_SYMBOL(page_symlink);
3580EXPORT_SYMBOL(page_symlink_inode_operations); 3404EXPORT_SYMBOL(page_symlink_inode_operations);
3581EXPORT_SYMBOL(path_lookup); 3405EXPORT_SYMBOL(kern_path_parent);
3582EXPORT_SYMBOL(kern_path); 3406EXPORT_SYMBOL(kern_path);
3583EXPORT_SYMBOL(vfs_path_lookup); 3407EXPORT_SYMBOL(vfs_path_lookup);
3584EXPORT_SYMBOL(inode_permission); 3408EXPORT_SYMBOL(inode_permission);
diff --git a/fs/namespace.c b/fs/namespace.c
index 7b0b95371696..dffe6f49ab93 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1002,6 +1002,18 @@ const struct seq_operations mounts_op = {
1002 .show = show_vfsmnt 1002 .show = show_vfsmnt
1003}; 1003};
1004 1004
1005static int uuid_is_nil(u8 *uuid)
1006{
1007 int i;
1008 u8 *cp = (u8 *)uuid;
1009
1010 for (i = 0; i < 16; i++) {
1011 if (*cp++)
1012 return 0;
1013 }
1014 return 1;
1015}
1016
1005static int show_mountinfo(struct seq_file *m, void *v) 1017static int show_mountinfo(struct seq_file *m, void *v)
1006{ 1018{
1007 struct proc_mounts *p = m->private; 1019 struct proc_mounts *p = m->private;
@@ -1040,6 +1052,10 @@ static int show_mountinfo(struct seq_file *m, void *v)
1040 if (IS_MNT_UNBINDABLE(mnt)) 1052 if (IS_MNT_UNBINDABLE(mnt))
1041 seq_puts(m, " unbindable"); 1053 seq_puts(m, " unbindable");
1042 1054
1055 if (!uuid_is_nil(mnt->mnt_sb->s_uuid))
1056 /* print the uuid */
1057 seq_printf(m, " uuid:%pU", mnt->mnt_sb->s_uuid);
1058
1043 /* Filesystem specific data */ 1059 /* Filesystem specific data */
1044 seq_puts(m, " - "); 1060 seq_puts(m, " - ");
1045 show_type(m, sb); 1061 show_type(m, sb);
@@ -1244,7 +1260,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
1244 */ 1260 */
1245 br_write_lock(vfsmount_lock); 1261 br_write_lock(vfsmount_lock);
1246 if (mnt_get_count(mnt) != 2) { 1262 if (mnt_get_count(mnt) != 2) {
1247 br_write_lock(vfsmount_lock); 1263 br_write_unlock(vfsmount_lock);
1248 return -EBUSY; 1264 return -EBUSY;
1249 } 1265 }
1250 br_write_unlock(vfsmount_lock); 1266 br_write_unlock(vfsmount_lock);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1cc600e77bb4..2f8e61816d75 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -37,6 +37,7 @@
37#include <linux/inet.h> 37#include <linux/inet.h>
38#include <linux/nfs_xdr.h> 38#include <linux/nfs_xdr.h>
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/compat.h>
40 41
41#include <asm/system.h> 42#include <asm/system.h>
42#include <asm/uaccess.h> 43#include <asm/uaccess.h>
@@ -89,7 +90,11 @@ int nfs_wait_bit_killable(void *word)
89 */ 90 */
90u64 nfs_compat_user_ino64(u64 fileid) 91u64 nfs_compat_user_ino64(u64 fileid)
91{ 92{
92 int ino; 93#ifdef CONFIG_COMPAT
94 compat_ulong_t ino;
95#else
96 unsigned long ino;
97#endif
93 98
94 if (enable_ino64) 99 if (enable_ino64)
95 return fileid; 100 return fileid;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 7a7474073148..1be36cf65bfc 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -298,6 +298,11 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
298#if defined(CONFIG_NFS_V4_1) 298#if defined(CONFIG_NFS_V4_1)
299struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); 299struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
300struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); 300struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
301extern void nfs4_schedule_session_recovery(struct nfs4_session *);
302#else
303static inline void nfs4_schedule_session_recovery(struct nfs4_session *session)
304{
305}
301#endif /* CONFIG_NFS_V4_1 */ 306#endif /* CONFIG_NFS_V4_1 */
302 307
303extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); 308extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
@@ -307,10 +312,9 @@ extern void nfs4_put_open_state(struct nfs4_state *);
307extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t); 312extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t);
308extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t); 313extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t);
309extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); 314extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
310extern void nfs4_schedule_state_recovery(struct nfs_client *); 315extern void nfs4_schedule_lease_recovery(struct nfs_client *);
311extern void nfs4_schedule_state_manager(struct nfs_client *); 316extern void nfs4_schedule_state_manager(struct nfs_client *);
312extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state); 317extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
313extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state);
314extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); 318extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
315extern void nfs41_handle_recall_slot(struct nfs_client *clp); 319extern void nfs41_handle_recall_slot(struct nfs_client *clp);
316extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 320extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index f5c9b125e8cc..b73c34375f60 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -219,6 +219,10 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
219 goto out_err; 219 goto out_err;
220 } 220 }
221 buf = kmalloc(rlen + 1, GFP_KERNEL); 221 buf = kmalloc(rlen + 1, GFP_KERNEL);
222 if (!buf) {
223 dprintk("%s: Not enough memory\n", __func__);
224 goto out_err;
225 }
222 buf[rlen] = '\0'; 226 buf[rlen] = '\0';
223 memcpy(buf, r_addr, rlen); 227 memcpy(buf, r_addr, rlen);
224 228
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 78936a8f40ab..0a07e353a961 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -256,12 +256,13 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
256 case -NFS4ERR_OPENMODE: 256 case -NFS4ERR_OPENMODE:
257 if (state == NULL) 257 if (state == NULL)
258 break; 258 break;
259 nfs4_state_mark_reclaim_nograce(clp, state); 259 nfs4_schedule_stateid_recovery(server, state);
260 goto do_state_recovery; 260 goto wait_on_recovery;
261 case -NFS4ERR_STALE_STATEID: 261 case -NFS4ERR_STALE_STATEID:
262 case -NFS4ERR_STALE_CLIENTID: 262 case -NFS4ERR_STALE_CLIENTID:
263 case -NFS4ERR_EXPIRED: 263 case -NFS4ERR_EXPIRED:
264 goto do_state_recovery; 264 nfs4_schedule_lease_recovery(clp);
265 goto wait_on_recovery;
265#if defined(CONFIG_NFS_V4_1) 266#if defined(CONFIG_NFS_V4_1)
266 case -NFS4ERR_BADSESSION: 267 case -NFS4ERR_BADSESSION:
267 case -NFS4ERR_BADSLOT: 268 case -NFS4ERR_BADSLOT:
@@ -272,7 +273,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
272 case -NFS4ERR_SEQ_MISORDERED: 273 case -NFS4ERR_SEQ_MISORDERED:
273 dprintk("%s ERROR: %d Reset session\n", __func__, 274 dprintk("%s ERROR: %d Reset session\n", __func__,
274 errorcode); 275 errorcode);
275 nfs4_schedule_state_recovery(clp); 276 nfs4_schedule_session_recovery(clp->cl_session);
276 exception->retry = 1; 277 exception->retry = 1;
277 break; 278 break;
278#endif /* defined(CONFIG_NFS_V4_1) */ 279#endif /* defined(CONFIG_NFS_V4_1) */
@@ -295,8 +296,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
295 } 296 }
296 /* We failed to handle the error */ 297 /* We failed to handle the error */
297 return nfs4_map_errors(ret); 298 return nfs4_map_errors(ret);
298do_state_recovery: 299wait_on_recovery:
299 nfs4_schedule_state_recovery(clp);
300 ret = nfs4_wait_clnt_recover(clp); 300 ret = nfs4_wait_clnt_recover(clp);
301 if (ret == 0) 301 if (ret == 0)
302 exception->retry = 1; 302 exception->retry = 1;
@@ -435,8 +435,8 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
435 clp = res->sr_session->clp; 435 clp = res->sr_session->clp;
436 do_renew_lease(clp, timestamp); 436 do_renew_lease(clp, timestamp);
437 /* Check sequence flags */ 437 /* Check sequence flags */
438 if (atomic_read(&clp->cl_count) > 1) 438 if (res->sr_status_flags != 0)
439 nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); 439 nfs4_schedule_lease_recovery(clp);
440 break; 440 break;
441 case -NFS4ERR_DELAY: 441 case -NFS4ERR_DELAY:
442 /* The server detected a resend of the RPC call and 442 /* The server detected a resend of the RPC call and
@@ -1255,14 +1255,13 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1255 case -NFS4ERR_BAD_HIGH_SLOT: 1255 case -NFS4ERR_BAD_HIGH_SLOT:
1256 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 1256 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1257 case -NFS4ERR_DEADSESSION: 1257 case -NFS4ERR_DEADSESSION:
1258 nfs4_schedule_state_recovery( 1258 nfs4_schedule_session_recovery(server->nfs_client->cl_session);
1259 server->nfs_client);
1260 goto out; 1259 goto out;
1261 case -NFS4ERR_STALE_CLIENTID: 1260 case -NFS4ERR_STALE_CLIENTID:
1262 case -NFS4ERR_STALE_STATEID: 1261 case -NFS4ERR_STALE_STATEID:
1263 case -NFS4ERR_EXPIRED: 1262 case -NFS4ERR_EXPIRED:
1264 /* Don't recall a delegation if it was lost */ 1263 /* Don't recall a delegation if it was lost */
1265 nfs4_schedule_state_recovery(server->nfs_client); 1264 nfs4_schedule_lease_recovery(server->nfs_client);
1266 goto out; 1265 goto out;
1267 case -ERESTARTSYS: 1266 case -ERESTARTSYS:
1268 /* 1267 /*
@@ -1271,7 +1270,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1271 */ 1270 */
1272 case -NFS4ERR_ADMIN_REVOKED: 1271 case -NFS4ERR_ADMIN_REVOKED:
1273 case -NFS4ERR_BAD_STATEID: 1272 case -NFS4ERR_BAD_STATEID:
1274 nfs4_state_mark_reclaim_nograce(server->nfs_client, state); 1273 nfs4_schedule_stateid_recovery(server, state);
1275 case -EKEYEXPIRED: 1274 case -EKEYEXPIRED:
1276 /* 1275 /*
1277 * User RPCSEC_GSS context has expired. 1276 * User RPCSEC_GSS context has expired.
@@ -1587,7 +1586,7 @@ static int nfs4_recover_expired_lease(struct nfs_server *server)
1587 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) && 1586 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
1588 !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state)) 1587 !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state))
1589 break; 1588 break;
1590 nfs4_schedule_state_recovery(clp); 1589 nfs4_schedule_state_manager(clp);
1591 ret = -EIO; 1590 ret = -EIO;
1592 } 1591 }
1593 return ret; 1592 return ret;
@@ -3178,7 +3177,7 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata)
3178 if (task->tk_status < 0) { 3177 if (task->tk_status < 0) {
3179 /* Unless we're shutting down, schedule state recovery! */ 3178 /* Unless we're shutting down, schedule state recovery! */
3180 if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0) 3179 if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0)
3181 nfs4_schedule_state_recovery(clp); 3180 nfs4_schedule_lease_recovery(clp);
3182 return; 3181 return;
3183 } 3182 }
3184 do_renew_lease(clp, timestamp); 3183 do_renew_lease(clp, timestamp);
@@ -3252,6 +3251,35 @@ static void buf_to_pages(const void *buf, size_t buflen,
3252 } 3251 }
3253} 3252}
3254 3253
3254static int buf_to_pages_noslab(const void *buf, size_t buflen,
3255 struct page **pages, unsigned int *pgbase)
3256{
3257 struct page *newpage, **spages;
3258 int rc = 0;
3259 size_t len;
3260 spages = pages;
3261
3262 do {
3263 len = min_t(size_t, PAGE_CACHE_SIZE, buflen);
3264 newpage = alloc_page(GFP_KERNEL);
3265
3266 if (newpage == NULL)
3267 goto unwind;
3268 memcpy(page_address(newpage), buf, len);
3269 buf += len;
3270 buflen -= len;
3271 *pages++ = newpage;
3272 rc++;
3273 } while (buflen != 0);
3274
3275 return rc;
3276
3277unwind:
3278 for(; rc > 0; rc--)
3279 __free_page(spages[rc-1]);
3280 return -ENOMEM;
3281}
3282
3255struct nfs4_cached_acl { 3283struct nfs4_cached_acl {
3256 int cached; 3284 int cached;
3257 size_t len; 3285 size_t len;
@@ -3420,13 +3448,23 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
3420 .rpc_argp = &arg, 3448 .rpc_argp = &arg,
3421 .rpc_resp = &res, 3449 .rpc_resp = &res,
3422 }; 3450 };
3423 int ret; 3451 int ret, i;
3424 3452
3425 if (!nfs4_server_supports_acls(server)) 3453 if (!nfs4_server_supports_acls(server))
3426 return -EOPNOTSUPP; 3454 return -EOPNOTSUPP;
3455 i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
3456 if (i < 0)
3457 return i;
3427 nfs_inode_return_delegation(inode); 3458 nfs_inode_return_delegation(inode);
3428 buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
3429 ret = nfs4_call_sync(server, &msg, &arg, &res, 1); 3459 ret = nfs4_call_sync(server, &msg, &arg, &res, 1);
3460
3461 /*
3462 * Free each page after tx, so the only ref left is
3463 * held by the network stack
3464 */
3465 for (; i > 0; i--)
3466 put_page(pages[i-1]);
3467
3430 /* 3468 /*
3431 * Acl update can result in inode attribute update. 3469 * Acl update can result in inode attribute update.
3432 * so mark the attribute cache invalid. 3470 * so mark the attribute cache invalid.
@@ -3464,12 +3502,13 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3464 case -NFS4ERR_OPENMODE: 3502 case -NFS4ERR_OPENMODE:
3465 if (state == NULL) 3503 if (state == NULL)
3466 break; 3504 break;
3467 nfs4_state_mark_reclaim_nograce(clp, state); 3505 nfs4_schedule_stateid_recovery(server, state);
3468 goto do_state_recovery; 3506 goto wait_on_recovery;
3469 case -NFS4ERR_STALE_STATEID: 3507 case -NFS4ERR_STALE_STATEID:
3470 case -NFS4ERR_STALE_CLIENTID: 3508 case -NFS4ERR_STALE_CLIENTID:
3471 case -NFS4ERR_EXPIRED: 3509 case -NFS4ERR_EXPIRED:
3472 goto do_state_recovery; 3510 nfs4_schedule_lease_recovery(clp);
3511 goto wait_on_recovery;
3473#if defined(CONFIG_NFS_V4_1) 3512#if defined(CONFIG_NFS_V4_1)
3474 case -NFS4ERR_BADSESSION: 3513 case -NFS4ERR_BADSESSION:
3475 case -NFS4ERR_BADSLOT: 3514 case -NFS4ERR_BADSLOT:
@@ -3480,7 +3519,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3480 case -NFS4ERR_SEQ_MISORDERED: 3519 case -NFS4ERR_SEQ_MISORDERED:
3481 dprintk("%s ERROR %d, Reset session\n", __func__, 3520 dprintk("%s ERROR %d, Reset session\n", __func__,
3482 task->tk_status); 3521 task->tk_status);
3483 nfs4_schedule_state_recovery(clp); 3522 nfs4_schedule_session_recovery(clp->cl_session);
3484 task->tk_status = 0; 3523 task->tk_status = 0;
3485 return -EAGAIN; 3524 return -EAGAIN;
3486#endif /* CONFIG_NFS_V4_1 */ 3525#endif /* CONFIG_NFS_V4_1 */
@@ -3497,9 +3536,8 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3497 } 3536 }
3498 task->tk_status = nfs4_map_errors(task->tk_status); 3537 task->tk_status = nfs4_map_errors(task->tk_status);
3499 return 0; 3538 return 0;
3500do_state_recovery: 3539wait_on_recovery:
3501 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); 3540 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
3502 nfs4_schedule_state_recovery(clp);
3503 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) 3541 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
3504 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); 3542 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
3505 task->tk_status = 0; 3543 task->tk_status = 0;
@@ -4110,7 +4148,7 @@ static void nfs4_lock_release(void *calldata)
4110 task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp, 4148 task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
4111 data->arg.lock_seqid); 4149 data->arg.lock_seqid);
4112 if (!IS_ERR(task)) 4150 if (!IS_ERR(task))
4113 rpc_put_task(task); 4151 rpc_put_task_async(task);
4114 dprintk("%s: cancelling lock!\n", __func__); 4152 dprintk("%s: cancelling lock!\n", __func__);
4115 } else 4153 } else
4116 nfs_free_seqid(data->arg.lock_seqid); 4154 nfs_free_seqid(data->arg.lock_seqid);
@@ -4134,23 +4172,18 @@ static const struct rpc_call_ops nfs4_recover_lock_ops = {
4134 4172
4135static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error) 4173static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error)
4136{ 4174{
4137 struct nfs_client *clp = server->nfs_client;
4138 struct nfs4_state *state = lsp->ls_state;
4139
4140 switch (error) { 4175 switch (error) {
4141 case -NFS4ERR_ADMIN_REVOKED: 4176 case -NFS4ERR_ADMIN_REVOKED:
4142 case -NFS4ERR_BAD_STATEID: 4177 case -NFS4ERR_BAD_STATEID:
4143 case -NFS4ERR_EXPIRED: 4178 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4144 if (new_lock_owner != 0 || 4179 if (new_lock_owner != 0 ||
4145 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) 4180 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
4146 nfs4_state_mark_reclaim_nograce(clp, state); 4181 nfs4_schedule_stateid_recovery(server, lsp->ls_state);
4147 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4148 break; 4182 break;
4149 case -NFS4ERR_STALE_STATEID: 4183 case -NFS4ERR_STALE_STATEID:
4150 if (new_lock_owner != 0 ||
4151 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
4152 nfs4_state_mark_reclaim_reboot(clp, state);
4153 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; 4184 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4185 case -NFS4ERR_EXPIRED:
4186 nfs4_schedule_lease_recovery(server->nfs_client);
4154 }; 4187 };
4155} 4188}
4156 4189
@@ -4366,12 +4399,14 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4366 case -NFS4ERR_EXPIRED: 4399 case -NFS4ERR_EXPIRED:
4367 case -NFS4ERR_STALE_CLIENTID: 4400 case -NFS4ERR_STALE_CLIENTID:
4368 case -NFS4ERR_STALE_STATEID: 4401 case -NFS4ERR_STALE_STATEID:
4402 nfs4_schedule_lease_recovery(server->nfs_client);
4403 goto out;
4369 case -NFS4ERR_BADSESSION: 4404 case -NFS4ERR_BADSESSION:
4370 case -NFS4ERR_BADSLOT: 4405 case -NFS4ERR_BADSLOT:
4371 case -NFS4ERR_BAD_HIGH_SLOT: 4406 case -NFS4ERR_BAD_HIGH_SLOT:
4372 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 4407 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
4373 case -NFS4ERR_DEADSESSION: 4408 case -NFS4ERR_DEADSESSION:
4374 nfs4_schedule_state_recovery(server->nfs_client); 4409 nfs4_schedule_session_recovery(server->nfs_client->cl_session);
4375 goto out; 4410 goto out;
4376 case -ERESTARTSYS: 4411 case -ERESTARTSYS:
4377 /* 4412 /*
@@ -4381,7 +4416,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4381 case -NFS4ERR_ADMIN_REVOKED: 4416 case -NFS4ERR_ADMIN_REVOKED:
4382 case -NFS4ERR_BAD_STATEID: 4417 case -NFS4ERR_BAD_STATEID:
4383 case -NFS4ERR_OPENMODE: 4418 case -NFS4ERR_OPENMODE:
4384 nfs4_state_mark_reclaim_nograce(server->nfs_client, state); 4419 nfs4_schedule_stateid_recovery(server, state);
4385 err = 0; 4420 err = 0;
4386 goto out; 4421 goto out;
4387 case -EKEYEXPIRED: 4422 case -EKEYEXPIRED:
@@ -4988,10 +5023,20 @@ int nfs4_proc_create_session(struct nfs_client *clp)
4988 int status; 5023 int status;
4989 unsigned *ptr; 5024 unsigned *ptr;
4990 struct nfs4_session *session = clp->cl_session; 5025 struct nfs4_session *session = clp->cl_session;
5026 long timeout = 0;
5027 int err;
4991 5028
4992 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session); 5029 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
4993 5030
4994 status = _nfs4_proc_create_session(clp); 5031 do {
5032 status = _nfs4_proc_create_session(clp);
5033 if (status == -NFS4ERR_DELAY) {
5034 err = nfs4_delay(clp->cl_rpcclient, &timeout);
5035 if (err)
5036 status = err;
5037 }
5038 } while (status == -NFS4ERR_DELAY);
5039
4995 if (status) 5040 if (status)
4996 goto out; 5041 goto out;
4997 5042
@@ -5100,7 +5145,7 @@ static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client
5100 rpc_delay(task, NFS4_POLL_RETRY_MAX); 5145 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5101 return -EAGAIN; 5146 return -EAGAIN;
5102 default: 5147 default:
5103 nfs4_schedule_state_recovery(clp); 5148 nfs4_schedule_lease_recovery(clp);
5104 } 5149 }
5105 return 0; 5150 return 0;
5106} 5151}
@@ -5187,7 +5232,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
5187 if (IS_ERR(task)) 5232 if (IS_ERR(task))
5188 ret = PTR_ERR(task); 5233 ret = PTR_ERR(task);
5189 else 5234 else
5190 rpc_put_task(task); 5235 rpc_put_task_async(task);
5191 dprintk("<-- %s status=%d\n", __func__, ret); 5236 dprintk("<-- %s status=%d\n", __func__, ret);
5192 return ret; 5237 return ret;
5193} 5238}
@@ -5203,8 +5248,13 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
5203 goto out; 5248 goto out;
5204 } 5249 }
5205 ret = rpc_wait_for_completion_task(task); 5250 ret = rpc_wait_for_completion_task(task);
5206 if (!ret) 5251 if (!ret) {
5252 struct nfs4_sequence_res *res = task->tk_msg.rpc_resp;
5253
5254 if (task->tk_status == 0)
5255 nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
5207 ret = task->tk_status; 5256 ret = task->tk_status;
5257 }
5208 rpc_put_task(task); 5258 rpc_put_task(task);
5209out: 5259out:
5210 dprintk("<-- %s status=%d\n", __func__, ret); 5260 dprintk("<-- %s status=%d\n", __func__, ret);
@@ -5241,7 +5291,7 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
5241 rpc_delay(task, NFS4_POLL_RETRY_MAX); 5291 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5242 return -EAGAIN; 5292 return -EAGAIN;
5243 default: 5293 default:
5244 nfs4_schedule_state_recovery(clp); 5294 nfs4_schedule_lease_recovery(clp);
5245 } 5295 }
5246 return 0; 5296 return 0;
5247} 5297}
@@ -5309,6 +5359,9 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
5309 status = PTR_ERR(task); 5359 status = PTR_ERR(task);
5310 goto out; 5360 goto out;
5311 } 5361 }
5362 status = nfs4_wait_for_completion_rpc_task(task);
5363 if (status == 0)
5364 status = task->tk_status;
5312 rpc_put_task(task); 5365 rpc_put_task(task);
5313 return 0; 5366 return 0;
5314out: 5367out:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e6742b57a04c..0592288f9f06 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1007,9 +1007,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
1007} 1007}
1008 1008
1009/* 1009/*
1010 * Schedule a state recovery attempt 1010 * Schedule a lease recovery attempt
1011 */ 1011 */
1012void nfs4_schedule_state_recovery(struct nfs_client *clp) 1012void nfs4_schedule_lease_recovery(struct nfs_client *clp)
1013{ 1013{
1014 if (!clp) 1014 if (!clp)
1015 return; 1015 return;
@@ -1018,7 +1018,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp)
1018 nfs4_schedule_state_manager(clp); 1018 nfs4_schedule_state_manager(clp);
1019} 1019}
1020 1020
1021int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) 1021static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
1022{ 1022{
1023 1023
1024 set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); 1024 set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1032,7 +1032,7 @@ int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *st
1032 return 1; 1032 return 1;
1033} 1033}
1034 1034
1035int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state) 1035static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
1036{ 1036{
1037 set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); 1037 set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
1038 clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); 1038 clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1041,6 +1041,14 @@ int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *s
1041 return 1; 1041 return 1;
1042} 1042}
1043 1043
1044void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state)
1045{
1046 struct nfs_client *clp = server->nfs_client;
1047
1048 nfs4_state_mark_reclaim_nograce(clp, state);
1049 nfs4_schedule_state_manager(clp);
1050}
1051
1044static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) 1052static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
1045{ 1053{
1046 struct inode *inode = state->inode; 1054 struct inode *inode = state->inode;
@@ -1436,10 +1444,15 @@ static int nfs4_reclaim_lease(struct nfs_client *clp)
1436} 1444}
1437 1445
1438#ifdef CONFIG_NFS_V4_1 1446#ifdef CONFIG_NFS_V4_1
1447void nfs4_schedule_session_recovery(struct nfs4_session *session)
1448{
1449 nfs4_schedule_lease_recovery(session->clp);
1450}
1451
1439void nfs41_handle_recall_slot(struct nfs_client *clp) 1452void nfs41_handle_recall_slot(struct nfs_client *clp)
1440{ 1453{
1441 set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); 1454 set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
1442 nfs4_schedule_state_recovery(clp); 1455 nfs4_schedule_state_manager(clp);
1443} 1456}
1444 1457
1445static void nfs4_reset_all_state(struct nfs_client *clp) 1458static void nfs4_reset_all_state(struct nfs_client *clp)
@@ -1447,7 +1460,7 @@ static void nfs4_reset_all_state(struct nfs_client *clp)
1447 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { 1460 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
1448 clp->cl_boot_time = CURRENT_TIME; 1461 clp->cl_boot_time = CURRENT_TIME;
1449 nfs4_state_start_reclaim_nograce(clp); 1462 nfs4_state_start_reclaim_nograce(clp);
1450 nfs4_schedule_state_recovery(clp); 1463 nfs4_schedule_state_manager(clp);
1451 } 1464 }
1452} 1465}
1453 1466
@@ -1455,7 +1468,7 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp)
1455{ 1468{
1456 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { 1469 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
1457 nfs4_state_start_reclaim_reboot(clp); 1470 nfs4_state_start_reclaim_reboot(clp);
1458 nfs4_schedule_state_recovery(clp); 1471 nfs4_schedule_state_manager(clp);
1459 } 1472 }
1460} 1473}
1461 1474
@@ -1475,7 +1488,7 @@ static void nfs41_handle_cb_path_down(struct nfs_client *clp)
1475{ 1488{
1476 nfs_expire_all_delegations(clp); 1489 nfs_expire_all_delegations(clp);
1477 if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0) 1490 if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0)
1478 nfs4_schedule_state_recovery(clp); 1491 nfs4_schedule_state_manager(clp);
1479} 1492}
1480 1493
1481void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) 1494void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4e2c168b6ee9..94d50e86a124 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1660,7 +1660,7 @@ static void encode_create_session(struct xdr_stream *xdr,
1660 1660
1661 p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12); 1661 p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12);
1662 *p++ = cpu_to_be32(OP_CREATE_SESSION); 1662 *p++ = cpu_to_be32(OP_CREATE_SESSION);
1663 p = xdr_encode_hyper(p, clp->cl_ex_clid); 1663 p = xdr_encode_hyper(p, clp->cl_clientid);
1664 *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */ 1664 *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */
1665 *p++ = cpu_to_be32(args->flags); /*flags */ 1665 *p++ = cpu_to_be32(args->flags); /*flags */
1666 1666
@@ -4694,7 +4694,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
4694 p = xdr_inline_decode(xdr, 8); 4694 p = xdr_inline_decode(xdr, 8);
4695 if (unlikely(!p)) 4695 if (unlikely(!p))
4696 goto out_overflow; 4696 goto out_overflow;
4697 xdr_decode_hyper(p, &clp->cl_ex_clid); 4697 xdr_decode_hyper(p, &clp->cl_clientid);
4698 p = xdr_inline_decode(xdr, 12); 4698 p = xdr_inline_decode(xdr, 12);
4699 if (unlikely(!p)) 4699 if (unlikely(!p))
4700 goto out_overflow; 4700 goto out_overflow;
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 903908a20023..c541093a5bf2 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -86,11 +86,14 @@
86/* Default path we try to mount. "%s" gets replaced by our IP address */ 86/* Default path we try to mount. "%s" gets replaced by our IP address */
87#define NFS_ROOT "/tftpboot/%s" 87#define NFS_ROOT "/tftpboot/%s"
88 88
89/* Default NFSROOT mount options. */
90#define NFS_DEF_OPTIONS "udp"
91
89/* Parameters passed from the kernel command line */ 92/* Parameters passed from the kernel command line */
90static char nfs_root_parms[256] __initdata = ""; 93static char nfs_root_parms[256] __initdata = "";
91 94
92/* Text-based mount options passed to super.c */ 95/* Text-based mount options passed to super.c */
93static char nfs_root_options[256] __initdata = ""; 96static char nfs_root_options[256] __initdata = NFS_DEF_OPTIONS;
94 97
95/* Address of NFS server */ 98/* Address of NFS server */
96static __be32 servaddr __initdata = htonl(INADDR_NONE); 99static __be32 servaddr __initdata = htonl(INADDR_NONE);
@@ -160,8 +163,14 @@ static int __init root_nfs_copy(char *dest, const char *src,
160} 163}
161 164
162static int __init root_nfs_cat(char *dest, const char *src, 165static int __init root_nfs_cat(char *dest, const char *src,
163 const size_t destlen) 166 const size_t destlen)
164{ 167{
168 size_t len = strlen(dest);
169
170 if (len && dest[len - 1] != ',')
171 if (strlcat(dest, ",", destlen) > destlen)
172 return -1;
173
165 if (strlcat(dest, src, destlen) > destlen) 174 if (strlcat(dest, src, destlen) > destlen)
166 return -1; 175 return -1;
167 return 0; 176 return 0;
@@ -194,16 +203,6 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
194 if (root_nfs_cat(nfs_root_options, incoming, 203 if (root_nfs_cat(nfs_root_options, incoming,
195 sizeof(nfs_root_options))) 204 sizeof(nfs_root_options)))
196 return -1; 205 return -1;
197
198 /*
199 * Possibly prepare for more options to be appended
200 */
201 if (nfs_root_options[0] != '\0' &&
202 nfs_root_options[strlen(nfs_root_options)] != ',')
203 if (root_nfs_cat(nfs_root_options, ",",
204 sizeof(nfs_root_options)))
205 return -1;
206
207 return 0; 206 return 0;
208} 207}
209 208
@@ -217,7 +216,7 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
217 */ 216 */
218static int __init root_nfs_data(char *cmdline) 217static int __init root_nfs_data(char *cmdline)
219{ 218{
220 char addr_option[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1]; 219 char mand_options[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
221 int len, retval = -1; 220 int len, retval = -1;
222 char *tmp = NULL; 221 char *tmp = NULL;
223 const size_t tmplen = sizeof(nfs_export_path); 222 const size_t tmplen = sizeof(nfs_export_path);
@@ -244,9 +243,9 @@ static int __init root_nfs_data(char *cmdline)
244 * Append mandatory options for nfsroot so they override 243 * Append mandatory options for nfsroot so they override
245 * what has come before 244 * what has come before
246 */ 245 */
247 snprintf(addr_option, sizeof(addr_option), "nolock,addr=%pI4", 246 snprintf(mand_options, sizeof(mand_options), "nolock,addr=%pI4",
248 &servaddr); 247 &servaddr);
249 if (root_nfs_cat(nfs_root_options, addr_option, 248 if (root_nfs_cat(nfs_root_options, mand_options,
250 sizeof(nfs_root_options))) 249 sizeof(nfs_root_options)))
251 goto out_optionstoolong; 250 goto out_optionstoolong;
252 251
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index e313a51acdd1..6481d537d69d 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -180,7 +180,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
180 task_setup_data.rpc_client = NFS_CLIENT(dir); 180 task_setup_data.rpc_client = NFS_CLIENT(dir);
181 task = rpc_run_task(&task_setup_data); 181 task = rpc_run_task(&task_setup_data);
182 if (!IS_ERR(task)) 182 if (!IS_ERR(task))
183 rpc_put_task(task); 183 rpc_put_task_async(task);
184 return 1; 184 return 1;
185} 185}
186 186
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c8278f4046cb..42b92d7a9cc4 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1292,6 +1292,8 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1292 task = rpc_run_task(&task_setup_data); 1292 task = rpc_run_task(&task_setup_data);
1293 if (IS_ERR(task)) 1293 if (IS_ERR(task))
1294 return PTR_ERR(task); 1294 return PTR_ERR(task);
1295 if (how & FLUSH_SYNC)
1296 rpc_wait_for_completion_task(task);
1295 rpc_put_task(task); 1297 rpc_put_task(task);
1296 return 0; 1298 return 0;
1297} 1299}
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index bf9cbd242ddd..124e8fcb0dd6 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -22,30 +22,17 @@
22 22
23static struct file *do_open(char *name, int flags) 23static struct file *do_open(char *name, int flags)
24{ 24{
25 struct nameidata nd;
26 struct vfsmount *mnt; 25 struct vfsmount *mnt;
27 int error; 26 struct file *file;
28 27
29 mnt = do_kern_mount("nfsd", 0, "nfsd", NULL); 28 mnt = do_kern_mount("nfsd", 0, "nfsd", NULL);
30 if (IS_ERR(mnt)) 29 if (IS_ERR(mnt))
31 return (struct file *)mnt; 30 return (struct file *)mnt;
32 31
33 error = vfs_path_lookup(mnt->mnt_root, mnt, name, 0, &nd); 32 file = file_open_root(mnt->mnt_root, mnt, name, flags);
34 mntput(mnt); /* drop do_kern_mount reference */
35 if (error)
36 return ERR_PTR(error);
37
38 if (flags == O_RDWR)
39 error = may_open(&nd.path, MAY_READ|MAY_WRITE, flags);
40 else
41 error = may_open(&nd.path, MAY_WRITE, flags);
42 33
43 if (!error) 34 mntput(mnt); /* drop do_kern_mount reference */
44 return dentry_open(nd.path.dentry, nd.path.mnt, flags, 35 return file;
45 current_cred());
46
47 path_put(&nd.path);
48 return ERR_PTR(error);
49} 36}
50 37
51static struct { 38static struct {
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index cde36cb0f348..02eb4edf0ece 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -432,7 +432,7 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
432 * If the server returns different values for sessionID, slotID or 432 * If the server returns different values for sessionID, slotID or
433 * sequence number, the server is looney tunes. 433 * sequence number, the server is looney tunes.
434 */ 434 */
435 p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4); 435 p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4 + 4 + 4);
436 if (unlikely(p == NULL)) 436 if (unlikely(p == NULL))
437 goto out_overflow; 437 goto out_overflow;
438 memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN); 438 memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 54b60bfceb8d..7b566ec14e18 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2445,15 +2445,16 @@ nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
2445static struct nfs4_delegation * 2445static struct nfs4_delegation *
2446find_delegation_file(struct nfs4_file *fp, stateid_t *stid) 2446find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
2447{ 2447{
2448 struct nfs4_delegation *dp = NULL; 2448 struct nfs4_delegation *dp;
2449 2449
2450 spin_lock(&recall_lock); 2450 spin_lock(&recall_lock);
2451 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) { 2451 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
2452 if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid) 2452 if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid) {
2453 break; 2453 spin_unlock(&recall_lock);
2454 } 2454 return dp;
2455 }
2455 spin_unlock(&recall_lock); 2456 spin_unlock(&recall_lock);
2456 return dp; 2457 return NULL;
2457} 2458}
2458 2459
2459int share_access_to_flags(u32 share_access) 2460int share_access_to_flags(u32 share_access)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 1275b8655070..615f0a9f0600 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1142,7 +1142,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
1142 1142
1143 u32 dummy; 1143 u32 dummy;
1144 char *machine_name; 1144 char *machine_name;
1145 int i; 1145 int i, j;
1146 int nr_secflavs; 1146 int nr_secflavs;
1147 1147
1148 READ_BUF(16); 1148 READ_BUF(16);
@@ -1215,7 +1215,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
1215 READ_BUF(4); 1215 READ_BUF(4);
1216 READ32(dummy); 1216 READ32(dummy);
1217 READ_BUF(dummy * 4); 1217 READ_BUF(dummy * 4);
1218 for (i = 0; i < dummy; ++i) 1218 for (j = 0; j < dummy; ++j)
1219 READ32(dummy); 1219 READ32(dummy);
1220 break; 1220 break;
1221 case RPC_AUTH_GSS: 1221 case RPC_AUTH_GSS:
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 388e9e8f5286..85f7baa15f5d 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -35,11 +35,6 @@
35#include "btnode.h" 35#include "btnode.h"
36 36
37 37
38void nilfs_btnode_cache_init_once(struct address_space *btnc)
39{
40 nilfs_mapping_init_once(btnc);
41}
42
43static const struct address_space_operations def_btnode_aops = { 38static const struct address_space_operations def_btnode_aops = {
44 .sync_page = block_sync_page, 39 .sync_page = block_sync_page,
45}; 40};
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 79037494f1e0..1b8ebd888c28 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -37,7 +37,6 @@ struct nilfs_btnode_chkey_ctxt {
37 struct buffer_head *newbh; 37 struct buffer_head *newbh;
38}; 38};
39 39
40void nilfs_btnode_cache_init_once(struct address_space *);
41void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *); 40void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
42void nilfs_btnode_cache_clear(struct address_space *); 41void nilfs_btnode_cache_clear(struct address_space *);
43struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, 42struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 6a0e2a189f60..a0babd2bff6a 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -454,9 +454,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
454 struct backing_dev_info *bdi = inode->i_sb->s_bdi; 454 struct backing_dev_info *bdi = inode->i_sb->s_bdi;
455 455
456 INIT_LIST_HEAD(&shadow->frozen_buffers); 456 INIT_LIST_HEAD(&shadow->frozen_buffers);
457 nilfs_mapping_init_once(&shadow->frozen_data); 457 address_space_init_once(&shadow->frozen_data);
458 nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops); 458 nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops);
459 nilfs_mapping_init_once(&shadow->frozen_btnodes); 459 address_space_init_once(&shadow->frozen_btnodes);
460 nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops); 460 nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops);
461 mi->mi_shadow = shadow; 461 mi->mi_shadow = shadow;
462 return 0; 462 return 0;
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 98034271cd02..161791d26458 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -397,7 +397,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
397 new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page); 397 new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page);
398 if (!new_de) 398 if (!new_de)
399 goto out_dir; 399 goto out_dir;
400 inc_nlink(old_inode);
401 nilfs_set_link(new_dir, new_de, new_page, old_inode); 400 nilfs_set_link(new_dir, new_de, new_page, old_inode);
402 nilfs_mark_inode_dirty(new_dir); 401 nilfs_mark_inode_dirty(new_dir);
403 new_inode->i_ctime = CURRENT_TIME; 402 new_inode->i_ctime = CURRENT_TIME;
@@ -411,13 +410,9 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
411 if (new_dir->i_nlink >= NILFS_LINK_MAX) 410 if (new_dir->i_nlink >= NILFS_LINK_MAX)
412 goto out_dir; 411 goto out_dir;
413 } 412 }
414 inc_nlink(old_inode);
415 err = nilfs_add_link(new_dentry, old_inode); 413 err = nilfs_add_link(new_dentry, old_inode);
416 if (err) { 414 if (err)
417 drop_nlink(old_inode);
418 nilfs_mark_inode_dirty(old_inode);
419 goto out_dir; 415 goto out_dir;
420 }
421 if (dir_de) { 416 if (dir_de) {
422 inc_nlink(new_dir); 417 inc_nlink(new_dir);
423 nilfs_mark_inode_dirty(new_dir); 418 nilfs_mark_inode_dirty(new_dir);
@@ -431,7 +426,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
431 old_inode->i_ctime = CURRENT_TIME; 426 old_inode->i_ctime = CURRENT_TIME;
432 427
433 nilfs_delete_entry(old_de, old_page); 428 nilfs_delete_entry(old_de, old_page);
434 drop_nlink(old_inode);
435 429
436 if (dir_de) { 430 if (dir_de) {
437 nilfs_set_link(old_inode, dir_de, dir_page, new_dir); 431 nilfs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 0c432416cfef..a585b35fd6bc 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -492,19 +492,6 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
492 return nc; 492 return nc;
493} 493}
494 494
495void nilfs_mapping_init_once(struct address_space *mapping)
496{
497 memset(mapping, 0, sizeof(*mapping));
498 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
499 spin_lock_init(&mapping->tree_lock);
500 INIT_LIST_HEAD(&mapping->private_list);
501 spin_lock_init(&mapping->private_lock);
502
503 spin_lock_init(&mapping->i_mmap_lock);
504 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
505 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
506}
507
508void nilfs_mapping_init(struct address_space *mapping, 495void nilfs_mapping_init(struct address_space *mapping,
509 struct backing_dev_info *bdi, 496 struct backing_dev_info *bdi,
510 const struct address_space_operations *aops) 497 const struct address_space_operations *aops)
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index 622df27cd891..2a00953ebd5f 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -61,7 +61,6 @@ void nilfs_free_private_page(struct page *);
61int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); 61int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
62void nilfs_copy_back_pages(struct address_space *, struct address_space *); 62void nilfs_copy_back_pages(struct address_space *, struct address_space *);
63void nilfs_clear_dirty_pages(struct address_space *); 63void nilfs_clear_dirty_pages(struct address_space *);
64void nilfs_mapping_init_once(struct address_space *mapping);
65void nilfs_mapping_init(struct address_space *mapping, 64void nilfs_mapping_init(struct address_space *mapping,
66 struct backing_dev_info *bdi, 65 struct backing_dev_info *bdi,
67 const struct address_space_operations *aops); 66 const struct address_space_operations *aops);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 55ebae5c7f39..2de9f636792a 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -430,7 +430,8 @@ static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci,
430 nilfs_segctor_map_segsum_entry( 430 nilfs_segctor_map_segsum_entry(
431 sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo)); 431 sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo));
432 432
433 if (inode->i_sb && !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) 433 if (NILFS_I(inode)->i_root &&
434 !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
434 set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); 435 set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
435 /* skip finfo */ 436 /* skip finfo */
436} 437}
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 58fd707174e1..1673b3d99842 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1279,7 +1279,7 @@ static void nilfs_inode_init_once(void *obj)
1279#ifdef CONFIG_NILFS_XATTR 1279#ifdef CONFIG_NILFS_XATTR
1280 init_rwsem(&ii->xattr_sem); 1280 init_rwsem(&ii->xattr_sem);
1281#endif 1281#endif
1282 nilfs_btnode_cache_init_once(&ii->i_btnode_cache); 1282 address_space_init_once(&ii->i_btnode_cache);
1283 ii->i_bmap = &ii->i_bmap_data; 1283 ii->i_bmap = &ii->i_bmap_data;
1284 inode_init_once(&ii->vfs_inode); 1284 inode_init_once(&ii->vfs_inode);
1285} 1285}
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 6d80ecc7834f..7eb90403fc8a 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -56,7 +56,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
56 int ret = 0; /* if all else fails, just return false */ 56 int ret = 0; /* if all else fails, just return false */
57 struct ocfs2_super *osb; 57 struct ocfs2_super *osb;
58 58
59 if (nd->flags & LOOKUP_RCU) 59 if (nd && nd->flags & LOOKUP_RCU)
60 return -ECHILD; 60 return -ECHILD;
61 61
62 inode = dentry->d_inode; 62 inode = dentry->d_inode;
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 5dbc3062b4fd..254652a9b542 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -197,8 +197,12 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
197 dentry->d_name.len, dentry->d_name.name, 197 dentry->d_name.len, dentry->d_name.name,
198 fh, len, connectable); 198 fh, len, connectable);
199 199
200 if (len < 3 || (connectable && len < 6)) { 200 if (connectable && (len < 6)) {
201 mlog(ML_ERROR, "fh buffer is too small for encoding\n"); 201 *max_len = 6;
202 type = 255;
203 goto bail;
204 } else if (len < 3) {
205 *max_len = 3;
202 type = 255; 206 type = 255;
203 goto bail; 207 goto bail;
204 } 208 }
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 43e56b97f9c0..6180da1e37e6 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -405,9 +405,9 @@ static inline int ocfs2_remove_extent_credits(struct super_block *sb)
405 ocfs2_quota_trans_credits(sb); 405 ocfs2_quota_trans_credits(sb);
406} 406}
407 407
408/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + 408/* data block for new dir/symlink, allocation of directory block, dx_root
409 * bitmap block for the new bit) dx_root update for free list */ 409 * update for free list */
410#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2 + 1) 410#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + OCFS2_SUBALLOC_ALLOC + 1)
411 411
412static inline int ocfs2_add_dir_index_credits(struct super_block *sb) 412static inline int ocfs2_add_dir_index_credits(struct super_block *sb)
413{ 413{
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index b5f9160e93e9..29623da133cc 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3228,7 +3228,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3228 u32 num_clusters, unsigned int e_flags) 3228 u32 num_clusters, unsigned int e_flags)
3229{ 3229{
3230 int ret, delete, index, credits = 0; 3230 int ret, delete, index, credits = 0;
3231 u32 new_bit, new_len; 3231 u32 new_bit, new_len, orig_num_clusters;
3232 unsigned int set_len; 3232 unsigned int set_len;
3233 struct ocfs2_super *osb = OCFS2_SB(sb); 3233 struct ocfs2_super *osb = OCFS2_SB(sb);
3234 handle_t *handle; 3234 handle_t *handle;
@@ -3261,6 +3261,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3261 goto out; 3261 goto out;
3262 } 3262 }
3263 3263
3264 orig_num_clusters = num_clusters;
3265
3264 while (num_clusters) { 3266 while (num_clusters) {
3265 ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh, 3267 ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh,
3266 p_cluster, num_clusters, 3268 p_cluster, num_clusters,
@@ -3348,7 +3350,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3348 * in write-back mode. 3350 * in write-back mode.
3349 */ 3351 */
3350 if (context->get_clusters == ocfs2_di_get_clusters) { 3352 if (context->get_clusters == ocfs2_di_get_clusters) {
3351 ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters); 3353 ret = ocfs2_cow_sync_writeback(sb, context, cpos,
3354 orig_num_clusters);
3352 if (ret) 3355 if (ret)
3353 mlog_errno(ret); 3356 mlog_errno(ret);
3354 } 3357 }
@@ -4376,7 +4379,7 @@ static int ocfs2_user_path_parent(const char __user *path,
4376 if (IS_ERR(s)) 4379 if (IS_ERR(s))
4377 return PTR_ERR(s); 4380 return PTR_ERR(s);
4378 4381
4379 error = path_lookup(s, LOOKUP_PARENT, nd); 4382 error = kern_path_parent(s, nd);
4380 if (error) 4383 if (error)
4381 putname(s); 4384 putname(s);
4382 else 4385 else
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 38f986d2447e..36c423fb0635 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1316,7 +1316,7 @@ static int ocfs2_parse_options(struct super_block *sb,
1316 struct mount_options *mopt, 1316 struct mount_options *mopt,
1317 int is_remount) 1317 int is_remount)
1318{ 1318{
1319 int status; 1319 int status, user_stack = 0;
1320 char *p; 1320 char *p;
1321 u32 tmp; 1321 u32 tmp;
1322 1322
@@ -1459,6 +1459,15 @@ static int ocfs2_parse_options(struct super_block *sb,
1459 memcpy(mopt->cluster_stack, args[0].from, 1459 memcpy(mopt->cluster_stack, args[0].from,
1460 OCFS2_STACK_LABEL_LEN); 1460 OCFS2_STACK_LABEL_LEN);
1461 mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; 1461 mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
1462 /*
1463 * Open code the memcmp here as we don't have
1464 * an osb to pass to
1465 * ocfs2_userspace_stack().
1466 */
1467 if (memcmp(mopt->cluster_stack,
1468 OCFS2_CLASSIC_CLUSTER_STACK,
1469 OCFS2_STACK_LABEL_LEN))
1470 user_stack = 1;
1462 break; 1471 break;
1463 case Opt_inode64: 1472 case Opt_inode64:
1464 mopt->mount_opt |= OCFS2_MOUNT_INODE64; 1473 mopt->mount_opt |= OCFS2_MOUNT_INODE64;
@@ -1514,13 +1523,16 @@ static int ocfs2_parse_options(struct super_block *sb,
1514 } 1523 }
1515 } 1524 }
1516 1525
1517 /* Ensure only one heartbeat mode */ 1526 if (user_stack == 0) {
1518 tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | 1527 /* Ensure only one heartbeat mode */
1519 OCFS2_MOUNT_HB_NONE); 1528 tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL |
1520 if (hweight32(tmp) != 1) { 1529 OCFS2_MOUNT_HB_GLOBAL |
1521 mlog(ML_ERROR, "Invalid heartbeat mount options\n"); 1530 OCFS2_MOUNT_HB_NONE);
1522 status = 0; 1531 if (hweight32(tmp) != 1) {
1523 goto bail; 1532 mlog(ML_ERROR, "Invalid heartbeat mount options\n");
1533 status = 0;
1534 goto bail;
1535 }
1524 } 1536 }
1525 1537
1526 status = 1; 1538 status = 1;
diff --git a/fs/open.c b/fs/open.c
index 5a2c6ebc22b5..3cac0bda46df 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -233,6 +233,14 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
233 233
234 if (!(file->f_mode & FMODE_WRITE)) 234 if (!(file->f_mode & FMODE_WRITE))
235 return -EBADF; 235 return -EBADF;
236
237 /* It's not possible punch hole on append only file */
238 if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode))
239 return -EPERM;
240
241 if (IS_IMMUTABLE(inode))
242 return -EPERM;
243
236 /* 244 /*
237 * Revalidate the write permissions, in case security policy has 245 * Revalidate the write permissions, in case security policy has
238 * changed since the files were opened. 246 * changed since the files were opened.
@@ -565,13 +573,15 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
565{ 573{
566 struct path path; 574 struct path path;
567 int error = -EINVAL; 575 int error = -EINVAL;
568 int follow; 576 int lookup_flags;
569 577
570 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) 578 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
571 goto out; 579 goto out;
572 580
573 follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; 581 lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
574 error = user_path_at(dfd, filename, follow, &path); 582 if (flag & AT_EMPTY_PATH)
583 lookup_flags |= LOOKUP_EMPTY;
584 error = user_path_at(dfd, filename, lookup_flags, &path);
575 if (error) 585 if (error)
576 goto out; 586 goto out;
577 error = mnt_want_write(path.mnt); 587 error = mnt_want_write(path.mnt);
@@ -661,11 +671,16 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
661 int (*open)(struct inode *, struct file *), 671 int (*open)(struct inode *, struct file *),
662 const struct cred *cred) 672 const struct cred *cred)
663{ 673{
674 static const struct file_operations empty_fops = {};
664 struct inode *inode; 675 struct inode *inode;
665 int error; 676 int error;
666 677
667 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | 678 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
668 FMODE_PREAD | FMODE_PWRITE; 679 FMODE_PREAD | FMODE_PWRITE;
680
681 if (unlikely(f->f_flags & O_PATH))
682 f->f_mode = FMODE_PATH;
683
669 inode = dentry->d_inode; 684 inode = dentry->d_inode;
670 if (f->f_mode & FMODE_WRITE) { 685 if (f->f_mode & FMODE_WRITE) {
671 error = __get_file_write_access(inode, mnt); 686 error = __get_file_write_access(inode, mnt);
@@ -679,9 +694,15 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
679 f->f_path.dentry = dentry; 694 f->f_path.dentry = dentry;
680 f->f_path.mnt = mnt; 695 f->f_path.mnt = mnt;
681 f->f_pos = 0; 696 f->f_pos = 0;
682 f->f_op = fops_get(inode->i_fop);
683 file_sb_list_add(f, inode->i_sb); 697 file_sb_list_add(f, inode->i_sb);
684 698
699 if (unlikely(f->f_mode & FMODE_PATH)) {
700 f->f_op = &empty_fops;
701 return f;
702 }
703
704 f->f_op = fops_get(inode->i_fop);
705
685 error = security_dentry_open(f, cred); 706 error = security_dentry_open(f, cred);
686 if (error) 707 if (error)
687 goto cleanup_all; 708 goto cleanup_all;
@@ -882,15 +903,110 @@ void fd_install(unsigned int fd, struct file *file)
882 903
883EXPORT_SYMBOL(fd_install); 904EXPORT_SYMBOL(fd_install);
884 905
906static inline int build_open_flags(int flags, int mode, struct open_flags *op)
907{
908 int lookup_flags = 0;
909 int acc_mode;
910
911 if (!(flags & O_CREAT))
912 mode = 0;
913 op->mode = mode;
914
915 /* Must never be set by userspace */
916 flags &= ~FMODE_NONOTIFY;
917
918 /*
919 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
920 * check for O_DSYNC if the need any syncing at all we enforce it's
921 * always set instead of having to deal with possibly weird behaviour
922 * for malicious applications setting only __O_SYNC.
923 */
924 if (flags & __O_SYNC)
925 flags |= O_DSYNC;
926
927 /*
928 * If we have O_PATH in the open flag. Then we
929 * cannot have anything other than the below set of flags
930 */
931 if (flags & O_PATH) {
932 flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH;
933 acc_mode = 0;
934 } else {
935 acc_mode = MAY_OPEN | ACC_MODE(flags);
936 }
937
938 op->open_flag = flags;
939
940 /* O_TRUNC implies we need access checks for write permissions */
941 if (flags & O_TRUNC)
942 acc_mode |= MAY_WRITE;
943
944 /* Allow the LSM permission hook to distinguish append
945 access from general write access. */
946 if (flags & O_APPEND)
947 acc_mode |= MAY_APPEND;
948
949 op->acc_mode = acc_mode;
950
951 op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
952
953 if (flags & O_CREAT) {
954 op->intent |= LOOKUP_CREATE;
955 if (flags & O_EXCL)
956 op->intent |= LOOKUP_EXCL;
957 }
958
959 if (flags & O_DIRECTORY)
960 lookup_flags |= LOOKUP_DIRECTORY;
961 if (!(flags & O_NOFOLLOW))
962 lookup_flags |= LOOKUP_FOLLOW;
963 return lookup_flags;
964}
965
966/**
967 * filp_open - open file and return file pointer
968 *
969 * @filename: path to open
970 * @flags: open flags as per the open(2) second argument
971 * @mode: mode for the new file if O_CREAT is set, else ignored
972 *
973 * This is the helper to open a file from kernelspace if you really
974 * have to. But in generally you should not do this, so please move
975 * along, nothing to see here..
976 */
977struct file *filp_open(const char *filename, int flags, int mode)
978{
979 struct open_flags op;
980 int lookup = build_open_flags(flags, mode, &op);
981 return do_filp_open(AT_FDCWD, filename, &op, lookup);
982}
983EXPORT_SYMBOL(filp_open);
984
985struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
986 const char *filename, int flags)
987{
988 struct open_flags op;
989 int lookup = build_open_flags(flags, 0, &op);
990 if (flags & O_CREAT)
991 return ERR_PTR(-EINVAL);
992 if (!filename && (flags & O_DIRECTORY))
993 if (!dentry->d_inode->i_op->lookup)
994 return ERR_PTR(-ENOTDIR);
995 return do_file_open_root(dentry, mnt, filename, &op, lookup);
996}
997EXPORT_SYMBOL(file_open_root);
998
885long do_sys_open(int dfd, const char __user *filename, int flags, int mode) 999long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
886{ 1000{
1001 struct open_flags op;
1002 int lookup = build_open_flags(flags, mode, &op);
887 char *tmp = getname(filename); 1003 char *tmp = getname(filename);
888 int fd = PTR_ERR(tmp); 1004 int fd = PTR_ERR(tmp);
889 1005
890 if (!IS_ERR(tmp)) { 1006 if (!IS_ERR(tmp)) {
891 fd = get_unused_fd_flags(flags); 1007 fd = get_unused_fd_flags(flags);
892 if (fd >= 0) { 1008 if (fd >= 0) {
893 struct file *f = do_filp_open(dfd, tmp, flags, mode, 0); 1009 struct file *f = do_filp_open(dfd, tmp, &op, lookup);
894 if (IS_ERR(f)) { 1010 if (IS_ERR(f)) {
895 put_unused_fd(fd); 1011 put_unused_fd(fd);
896 fd = PTR_ERR(f); 1012 fd = PTR_ERR(f);
@@ -960,8 +1076,10 @@ int filp_close(struct file *filp, fl_owner_t id)
960 if (filp->f_op && filp->f_op->flush) 1076 if (filp->f_op && filp->f_op->flush)
961 retval = filp->f_op->flush(filp, id); 1077 retval = filp->f_op->flush(filp, id);
962 1078
963 dnotify_flush(filp, id); 1079 if (likely(!(filp->f_mode & FMODE_PATH))) {
964 locks_remove_posix(filp, id); 1080 dnotify_flush(filp, id);
1081 locks_remove_posix(filp, id);
1082 }
965 fput(filp); 1083 fput(filp);
966 return retval; 1084 return retval;
967} 1085}
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 789c625c7aa5..b10e3540d5b7 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -251,6 +251,11 @@ static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm)
251 } 251 }
252 252
253 vm->vblk_size = get_unaligned_be32(data + 0x08); 253 vm->vblk_size = get_unaligned_be32(data + 0x08);
254 if (vm->vblk_size == 0) {
255 ldm_error ("Illegal VBLK size");
256 return false;
257 }
258
254 vm->vblk_offset = get_unaligned_be32(data + 0x0C); 259 vm->vblk_offset = get_unaligned_be32(data + 0x0C);
255 vm->last_vblk_seq = get_unaligned_be32(data + 0x04); 260 vm->last_vblk_seq = get_unaligned_be32(data + 0x04);
256 261
diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c
index 48cec7cbca17..be03a0b08b47 100644
--- a/fs/partitions/osf.c
+++ b/fs/partitions/osf.c
@@ -10,10 +10,13 @@
10#include "check.h" 10#include "check.h"
11#include "osf.h" 11#include "osf.h"
12 12
13#define MAX_OSF_PARTITIONS 8
14
13int osf_partition(struct parsed_partitions *state) 15int osf_partition(struct parsed_partitions *state)
14{ 16{
15 int i; 17 int i;
16 int slot = 1; 18 int slot = 1;
19 unsigned int npartitions;
17 Sector sect; 20 Sector sect;
18 unsigned char *data; 21 unsigned char *data;
19 struct disklabel { 22 struct disklabel {
@@ -45,7 +48,7 @@ int osf_partition(struct parsed_partitions *state)
45 u8 p_fstype; 48 u8 p_fstype;
46 u8 p_frag; 49 u8 p_frag;
47 __le16 p_cpg; 50 __le16 p_cpg;
48 } d_partitions[8]; 51 } d_partitions[MAX_OSF_PARTITIONS];
49 } * label; 52 } * label;
50 struct d_partition * partition; 53 struct d_partition * partition;
51 54
@@ -63,7 +66,12 @@ int osf_partition(struct parsed_partitions *state)
63 put_dev_sector(sect); 66 put_dev_sector(sect);
64 return 0; 67 return 0;
65 } 68 }
66 for (i = 0 ; i < le16_to_cpu(label->d_npartitions); i++, partition++) { 69 npartitions = le16_to_cpu(label->d_npartitions);
70 if (npartitions > MAX_OSF_PARTITIONS) {
71 put_dev_sector(sect);
72 return 0;
73 }
74 for (i = 0 ; i < npartitions; i++, partition++) {
67 if (slot == state->limit) 75 if (slot == state->limit)
68 break; 76 break;
69 if (le32_to_cpu(partition->p_size)) 77 if (le32_to_cpu(partition->p_size))
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9d096e82b201..d49c4b5d2c3e 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2620,35 +2620,6 @@ static const struct pid_entry proc_base_stuff[] = {
2620 &proc_self_inode_operations, NULL, {}), 2620 &proc_self_inode_operations, NULL, {}),
2621}; 2621};
2622 2622
2623/*
2624 * Exceptional case: normally we are not allowed to unhash a busy
2625 * directory. In this case, however, we can do it - no aliasing problems
2626 * due to the way we treat inodes.
2627 */
2628static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd)
2629{
2630 struct inode *inode;
2631 struct task_struct *task;
2632
2633 if (nd->flags & LOOKUP_RCU)
2634 return -ECHILD;
2635
2636 inode = dentry->d_inode;
2637 task = get_proc_task(inode);
2638 if (task) {
2639 put_task_struct(task);
2640 return 1;
2641 }
2642 d_drop(dentry);
2643 return 0;
2644}
2645
2646static const struct dentry_operations proc_base_dentry_operations =
2647{
2648 .d_revalidate = proc_base_revalidate,
2649 .d_delete = pid_delete_dentry,
2650};
2651
2652static struct dentry *proc_base_instantiate(struct inode *dir, 2623static struct dentry *proc_base_instantiate(struct inode *dir,
2653 struct dentry *dentry, struct task_struct *task, const void *ptr) 2624 struct dentry *dentry, struct task_struct *task, const void *ptr)
2654{ 2625{
@@ -2685,7 +2656,6 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
2685 if (p->fop) 2656 if (p->fop)
2686 inode->i_fop = p->fop; 2657 inode->i_fop = p->fop;
2687 ei->op = p->op; 2658 ei->op = p->op;
2688 d_set_d_op(dentry, &proc_base_dentry_operations);
2689 d_add(dentry, inode); 2659 d_add(dentry, inode);
2690 error = NULL; 2660 error = NULL;
2691out: 2661out:
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 176ce4cda68a..d6a7ca1fdac5 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -27,6 +27,7 @@
27static void proc_evict_inode(struct inode *inode) 27static void proc_evict_inode(struct inode *inode)
28{ 28{
29 struct proc_dir_entry *de; 29 struct proc_dir_entry *de;
30 struct ctl_table_header *head;
30 31
31 truncate_inode_pages(&inode->i_data, 0); 32 truncate_inode_pages(&inode->i_data, 0);
32 end_writeback(inode); 33 end_writeback(inode);
@@ -38,8 +39,11 @@ static void proc_evict_inode(struct inode *inode)
38 de = PROC_I(inode)->pde; 39 de = PROC_I(inode)->pde;
39 if (de) 40 if (de)
40 pde_put(de); 41 pde_put(de);
41 if (PROC_I(inode)->sysctl) 42 head = PROC_I(inode)->sysctl;
42 sysctl_head_put(PROC_I(inode)->sysctl); 43 if (head) {
44 rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
45 sysctl_head_put(head);
46 }
43} 47}
44 48
45struct vfsmount *proc_mnt; 49struct vfsmount *proc_mnt;
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index d9396a4fc7ff..927cbd115e53 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -233,7 +233,7 @@ void __init proc_device_tree_init(void)
233 return; 233 return;
234 root = of_find_node_by_path("/"); 234 root = of_find_node_by_path("/");
235 if (root == NULL) { 235 if (root == NULL) {
236 printk(KERN_ERR "/proc/device-tree: can't find root\n"); 236 pr_debug("/proc/device-tree: can't find root\n");
237 return; 237 return;
238 } 238 }
239 proc_device_tree_add_node(root, proc_device_tree); 239 proc_device_tree_add_node(root, proc_device_tree);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 09a1f92a34ef..8eb2522111c5 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -408,15 +408,18 @@ static int proc_sys_compare(const struct dentry *parent,
408 const struct dentry *dentry, const struct inode *inode, 408 const struct dentry *dentry, const struct inode *inode,
409 unsigned int len, const char *str, const struct qstr *name) 409 unsigned int len, const char *str, const struct qstr *name)
410{ 410{
411 struct ctl_table_header *head;
411 /* Although proc doesn't have negative dentries, rcu-walk means 412 /* Although proc doesn't have negative dentries, rcu-walk means
412 * that inode here can be NULL */ 413 * that inode here can be NULL */
414 /* AV: can it, indeed? */
413 if (!inode) 415 if (!inode)
414 return 0; 416 return 1;
415 if (name->len != len) 417 if (name->len != len)
416 return 1; 418 return 1;
417 if (memcmp(name->name, str, len)) 419 if (memcmp(name->name, str, len))
418 return 1; 420 return 1;
419 return !sysctl_is_seen(PROC_I(inode)->sysctl); 421 head = rcu_dereference(PROC_I(inode)->sysctl);
422 return !head || !sysctl_is_seen(head);
420} 423}
421 424
422static const struct dentry_operations proc_sys_dentry_operations = { 425static const struct dentry_operations proc_sys_dentry_operations = {
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 0bae036831e2..1bba24bad820 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1593,8 +1593,13 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
1593 struct inode *inode = dentry->d_inode; 1593 struct inode *inode = dentry->d_inode;
1594 int maxlen = *lenp; 1594 int maxlen = *lenp;
1595 1595
1596 if (maxlen < 3) 1596 if (need_parent && (maxlen < 5)) {
1597 *lenp = 5;
1597 return 255; 1598 return 255;
1599 } else if (maxlen < 3) {
1600 *lenp = 3;
1601 return 255;
1602 }
1598 1603
1599 data[0] = inode->i_ino; 1604 data[0] = inode->i_ino;
1600 data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); 1605 data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index ba5f51ec3458..4b2eb564fdad 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -771,7 +771,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
771 EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE, 771 EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE,
772 dentry, inode, &security); 772 dentry, inode, &security);
773 if (retval) { 773 if (retval) {
774 dir->i_nlink--; 774 DEC_DIR_INODE_NLINK(dir)
775 goto out_failed; 775 goto out_failed;
776 } 776 }
777 777
@@ -1122,10 +1122,6 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
1122 reiserfs_write_unlock(dir->i_sb); 1122 reiserfs_write_unlock(dir->i_sb);
1123 return -EMLINK; 1123 return -EMLINK;
1124 } 1124 }
1125 if (inode->i_nlink == 0) {
1126 reiserfs_write_unlock(dir->i_sb);
1127 return -ENOENT;
1128 }
1129 1125
1130 /* inc before scheduling so reiserfs_unlink knows we are here */ 1126 /* inc before scheduling so reiserfs_unlink knows we are here */
1131 inc_nlink(inode); 1127 inc_nlink(inode);
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 3cfb2e933644..5c11ca82b782 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -978,8 +978,6 @@ int reiserfs_permission(struct inode *inode, int mask, unsigned int flags)
978 978
979static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd) 979static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
980{ 980{
981 if (nd->flags & LOOKUP_RCU)
982 return -ECHILD;
983 return -EPERM; 981 return -EPERM;
984} 982}
985 983
diff --git a/fs/stat.c b/fs/stat.c
index d5c61cf2b703..961039121cb8 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -75,13 +75,16 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
75 int error = -EINVAL; 75 int error = -EINVAL;
76 int lookup_flags = 0; 76 int lookup_flags = 0;
77 77
78 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT)) != 0) 78 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT |
79 AT_EMPTY_PATH)) != 0)
79 goto out; 80 goto out;
80 81
81 if (!(flag & AT_SYMLINK_NOFOLLOW)) 82 if (!(flag & AT_SYMLINK_NOFOLLOW))
82 lookup_flags |= LOOKUP_FOLLOW; 83 lookup_flags |= LOOKUP_FOLLOW;
83 if (flag & AT_NO_AUTOMOUNT) 84 if (flag & AT_NO_AUTOMOUNT)
84 lookup_flags |= LOOKUP_NO_AUTOMOUNT; 85 lookup_flags |= LOOKUP_NO_AUTOMOUNT;
86 if (flag & AT_EMPTY_PATH)
87 lookup_flags |= LOOKUP_EMPTY;
85 88
86 error = user_path_at(dfd, filename, lookup_flags, &path); 89 error = user_path_at(dfd, filename, lookup_flags, &path);
87 if (error) 90 if (error)
@@ -297,7 +300,7 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
297 if (bufsiz <= 0) 300 if (bufsiz <= 0)
298 return -EINVAL; 301 return -EINVAL;
299 302
300 error = user_path_at(dfd, pathname, 0, &path); 303 error = user_path_at(dfd, pathname, LOOKUP_EMPTY, &path);
301 if (!error) { 304 if (!error) {
302 struct inode *inode = path.dentry->d_inode; 305 struct inode *inode = path.dentry->d_inode;
303 306
diff --git a/fs/statfs.c b/fs/statfs.c
index 30ea8c8a996b..8244924dec55 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -73,149 +73,135 @@ int vfs_statfs(struct path *path, struct kstatfs *buf)
73} 73}
74EXPORT_SYMBOL(vfs_statfs); 74EXPORT_SYMBOL(vfs_statfs);
75 75
76static int do_statfs_native(struct path *path, struct statfs *buf) 76int user_statfs(const char __user *pathname, struct kstatfs *st)
77{ 77{
78 struct kstatfs st; 78 struct path path;
79 int retval; 79 int error = user_path(pathname, &path);
80 if (!error) {
81 error = vfs_statfs(&path, st);
82 path_put(&path);
83 }
84 return error;
85}
80 86
81 retval = vfs_statfs(path, &st); 87int fd_statfs(int fd, struct kstatfs *st)
82 if (retval) 88{
83 return retval; 89 struct file *file = fget(fd);
90 int error = -EBADF;
91 if (file) {
92 error = vfs_statfs(&file->f_path, st);
93 fput(file);
94 }
95 return error;
96}
84 97
85 if (sizeof(*buf) == sizeof(st)) 98static int do_statfs_native(struct kstatfs *st, struct statfs __user *p)
86 memcpy(buf, &st, sizeof(st)); 99{
100 struct statfs buf;
101
102 if (sizeof(buf) == sizeof(*st))
103 memcpy(&buf, st, sizeof(*st));
87 else { 104 else {
88 if (sizeof buf->f_blocks == 4) { 105 if (sizeof buf.f_blocks == 4) {
89 if ((st.f_blocks | st.f_bfree | st.f_bavail | 106 if ((st->f_blocks | st->f_bfree | st->f_bavail |
90 st.f_bsize | st.f_frsize) & 107 st->f_bsize | st->f_frsize) &
91 0xffffffff00000000ULL) 108 0xffffffff00000000ULL)
92 return -EOVERFLOW; 109 return -EOVERFLOW;
93 /* 110 /*
94 * f_files and f_ffree may be -1; it's okay to stuff 111 * f_files and f_ffree may be -1; it's okay to stuff
95 * that into 32 bits 112 * that into 32 bits
96 */ 113 */
97 if (st.f_files != -1 && 114 if (st->f_files != -1 &&
98 (st.f_files & 0xffffffff00000000ULL)) 115 (st->f_files & 0xffffffff00000000ULL))
99 return -EOVERFLOW; 116 return -EOVERFLOW;
100 if (st.f_ffree != -1 && 117 if (st->f_ffree != -1 &&
101 (st.f_ffree & 0xffffffff00000000ULL)) 118 (st->f_ffree & 0xffffffff00000000ULL))
102 return -EOVERFLOW; 119 return -EOVERFLOW;
103 } 120 }
104 121
105 buf->f_type = st.f_type; 122 buf.f_type = st->f_type;
106 buf->f_bsize = st.f_bsize; 123 buf.f_bsize = st->f_bsize;
107 buf->f_blocks = st.f_blocks; 124 buf.f_blocks = st->f_blocks;
108 buf->f_bfree = st.f_bfree; 125 buf.f_bfree = st->f_bfree;
109 buf->f_bavail = st.f_bavail; 126 buf.f_bavail = st->f_bavail;
110 buf->f_files = st.f_files; 127 buf.f_files = st->f_files;
111 buf->f_ffree = st.f_ffree; 128 buf.f_ffree = st->f_ffree;
112 buf->f_fsid = st.f_fsid; 129 buf.f_fsid = st->f_fsid;
113 buf->f_namelen = st.f_namelen; 130 buf.f_namelen = st->f_namelen;
114 buf->f_frsize = st.f_frsize; 131 buf.f_frsize = st->f_frsize;
115 buf->f_flags = st.f_flags; 132 buf.f_flags = st->f_flags;
116 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 133 memset(buf.f_spare, 0, sizeof(buf.f_spare));
117 } 134 }
135 if (copy_to_user(p, &buf, sizeof(buf)))
136 return -EFAULT;
118 return 0; 137 return 0;
119} 138}
120 139
121static int do_statfs64(struct path *path, struct statfs64 *buf) 140static int do_statfs64(struct kstatfs *st, struct statfs64 __user *p)
122{ 141{
123 struct kstatfs st; 142 struct statfs64 buf;
124 int retval; 143 if (sizeof(buf) == sizeof(*st))
125 144 memcpy(&buf, st, sizeof(*st));
126 retval = vfs_statfs(path, &st);
127 if (retval)
128 return retval;
129
130 if (sizeof(*buf) == sizeof(st))
131 memcpy(buf, &st, sizeof(st));
132 else { 145 else {
133 buf->f_type = st.f_type; 146 buf.f_type = st->f_type;
134 buf->f_bsize = st.f_bsize; 147 buf.f_bsize = st->f_bsize;
135 buf->f_blocks = st.f_blocks; 148 buf.f_blocks = st->f_blocks;
136 buf->f_bfree = st.f_bfree; 149 buf.f_bfree = st->f_bfree;
137 buf->f_bavail = st.f_bavail; 150 buf.f_bavail = st->f_bavail;
138 buf->f_files = st.f_files; 151 buf.f_files = st->f_files;
139 buf->f_ffree = st.f_ffree; 152 buf.f_ffree = st->f_ffree;
140 buf->f_fsid = st.f_fsid; 153 buf.f_fsid = st->f_fsid;
141 buf->f_namelen = st.f_namelen; 154 buf.f_namelen = st->f_namelen;
142 buf->f_frsize = st.f_frsize; 155 buf.f_frsize = st->f_frsize;
143 buf->f_flags = st.f_flags; 156 buf.f_flags = st->f_flags;
144 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 157 memset(buf.f_spare, 0, sizeof(buf.f_spare));
145 } 158 }
159 if (copy_to_user(p, &buf, sizeof(buf)))
160 return -EFAULT;
146 return 0; 161 return 0;
147} 162}
148 163
149SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf) 164SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf)
150{ 165{
151 struct path path; 166 struct kstatfs st;
152 int error; 167 int error = user_statfs(pathname, &st);
153 168 if (!error)
154 error = user_path(pathname, &path); 169 error = do_statfs_native(&st, buf);
155 if (!error) {
156 struct statfs tmp;
157 error = do_statfs_native(&path, &tmp);
158 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
159 error = -EFAULT;
160 path_put(&path);
161 }
162 return error; 170 return error;
163} 171}
164 172
165SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf) 173SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf)
166{ 174{
167 struct path path; 175 struct kstatfs st;
168 long error; 176 int error;
169
170 if (sz != sizeof(*buf)) 177 if (sz != sizeof(*buf))
171 return -EINVAL; 178 return -EINVAL;
172 error = user_path(pathname, &path); 179 error = user_statfs(pathname, &st);
173 if (!error) { 180 if (!error)
174 struct statfs64 tmp; 181 error = do_statfs64(&st, buf);
175 error = do_statfs64(&path, &tmp);
176 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
177 error = -EFAULT;
178 path_put(&path);
179 }
180 return error; 182 return error;
181} 183}
182 184
183SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf) 185SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf)
184{ 186{
185 struct file *file; 187 struct kstatfs st;
186 struct statfs tmp; 188 int error = fd_statfs(fd, &st);
187 int error; 189 if (!error)
188 190 error = do_statfs_native(&st, buf);
189 error = -EBADF;
190 file = fget(fd);
191 if (!file)
192 goto out;
193 error = do_statfs_native(&file->f_path, &tmp);
194 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
195 error = -EFAULT;
196 fput(file);
197out:
198 return error; 191 return error;
199} 192}
200 193
201SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf) 194SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf)
202{ 195{
203 struct file *file; 196 struct kstatfs st;
204 struct statfs64 tmp;
205 int error; 197 int error;
206 198
207 if (sz != sizeof(*buf)) 199 if (sz != sizeof(*buf))
208 return -EINVAL; 200 return -EINVAL;
209 201
210 error = -EBADF; 202 error = fd_statfs(fd, &st);
211 file = fget(fd); 203 if (!error)
212 if (!file) 204 error = do_statfs64(&st, buf);
213 goto out;
214 error = do_statfs64(&file->f_path, &tmp);
215 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
216 error = -EFAULT;
217 fput(file);
218out:
219 return error; 205 return error;
220} 206}
221 207
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index b427b1208c26..e474fbcf8bde 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -245,7 +245,6 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
245 new_de = sysv_find_entry(new_dentry, &new_page); 245 new_de = sysv_find_entry(new_dentry, &new_page);
246 if (!new_de) 246 if (!new_de)
247 goto out_dir; 247 goto out_dir;
248 inode_inc_link_count(old_inode);
249 sysv_set_link(new_de, new_page, old_inode); 248 sysv_set_link(new_de, new_page, old_inode);
250 new_inode->i_ctime = CURRENT_TIME_SEC; 249 new_inode->i_ctime = CURRENT_TIME_SEC;
251 if (dir_de) 250 if (dir_de)
@@ -257,18 +256,15 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
257 if (new_dir->i_nlink >= SYSV_SB(new_dir->i_sb)->s_link_max) 256 if (new_dir->i_nlink >= SYSV_SB(new_dir->i_sb)->s_link_max)
258 goto out_dir; 257 goto out_dir;
259 } 258 }
260 inode_inc_link_count(old_inode);
261 err = sysv_add_link(new_dentry, old_inode); 259 err = sysv_add_link(new_dentry, old_inode);
262 if (err) { 260 if (err)
263 inode_dec_link_count(old_inode);
264 goto out_dir; 261 goto out_dir;
265 }
266 if (dir_de) 262 if (dir_de)
267 inode_inc_link_count(new_dir); 263 inode_inc_link_count(new_dir);
268 } 264 }
269 265
270 sysv_delete_entry(old_de, old_page); 266 sysv_delete_entry(old_de, old_page);
271 inode_dec_link_count(old_inode); 267 mark_inode_dirty(old_inode);
272 268
273 if (dir_de) { 269 if (dir_de) {
274 sysv_set_link(dir_de, dir_page, new_dir); 270 sysv_set_link(dir_de, dir_page, new_dir);
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 14f64b689d7f..7217d67a80a6 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -522,24 +522,6 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
522 ubifs_assert(mutex_is_locked(&dir->i_mutex)); 522 ubifs_assert(mutex_is_locked(&dir->i_mutex));
523 ubifs_assert(mutex_is_locked(&inode->i_mutex)); 523 ubifs_assert(mutex_is_locked(&inode->i_mutex));
524 524
525 /*
526 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
527 * otherwise has the potential to corrupt the orphan inode list.
528 *
529 * Indeed, consider a scenario when 'vfs_link(dirA/fileA)' and
530 * 'vfs_unlink(dirA/fileA, dirB/fileB)' race. 'vfs_link()' does not
531 * lock 'dirA->i_mutex', so this is possible. Both of the functions
532 * lock 'fileA->i_mutex' though. Suppose 'vfs_unlink()' wins, and takes
533 * 'fileA->i_mutex' mutex first. Suppose 'fileA->i_nlink' is 1. In this
534 * case 'ubifs_unlink()' will drop the last reference, and put 'inodeA'
535 * to the list of orphans. After this, 'vfs_link()' will link
536 * 'dirB/fileB' to 'inodeA'. This is a problem because, for example,
537 * the subsequent 'vfs_unlink(dirB/fileB)' will add the same inode
538 * to the list of orphans.
539 */
540 if (inode->i_nlink == 0)
541 return -ENOENT;
542
543 err = dbg_check_synced_i_size(inode); 525 err = dbg_check_synced_i_size(inode);
544 if (err) 526 if (err)
545 return err; 527 return err;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 2be0f9eb86d2..f1dce848ef96 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -32,6 +32,8 @@
32#include <linux/crc-itu-t.h> 32#include <linux/crc-itu-t.h>
33#include <linux/exportfs.h> 33#include <linux/exportfs.h>
34 34
35enum { UDF_MAX_LINKS = 0xffff };
36
35static inline int udf_match(int len1, const unsigned char *name1, int len2, 37static inline int udf_match(int len1, const unsigned char *name1, int len2,
36 const unsigned char *name2) 38 const unsigned char *name2)
37{ 39{
@@ -650,7 +652,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
650 struct udf_inode_info *iinfo; 652 struct udf_inode_info *iinfo;
651 653
652 err = -EMLINK; 654 err = -EMLINK;
653 if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1) 655 if (dir->i_nlink >= UDF_MAX_LINKS)
654 goto out; 656 goto out;
655 657
656 err = -EIO; 658 err = -EIO;
@@ -1034,9 +1036,8 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
1034 struct fileIdentDesc cfi, *fi; 1036 struct fileIdentDesc cfi, *fi;
1035 int err; 1037 int err;
1036 1038
1037 if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) { 1039 if (inode->i_nlink >= UDF_MAX_LINKS)
1038 return -EMLINK; 1040 return -EMLINK;
1039 }
1040 1041
1041 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 1042 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
1042 if (!fi) { 1043 if (!fi) {
@@ -1131,9 +1132,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
1131 goto end_rename; 1132 goto end_rename;
1132 1133
1133 retval = -EMLINK; 1134 retval = -EMLINK;
1134 if (!new_inode && 1135 if (!new_inode && new_dir->i_nlink >= UDF_MAX_LINKS)
1135 new_dir->i_nlink >=
1136 (256 << sizeof(new_dir->i_nlink)) - 1)
1137 goto end_rename; 1136 goto end_rename;
1138 } 1137 }
1139 if (!nfi) { 1138 if (!nfi) {
@@ -1287,8 +1286,13 @@ static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp,
1287 struct fid *fid = (struct fid *)fh; 1286 struct fid *fid = (struct fid *)fh;
1288 int type = FILEID_UDF_WITHOUT_PARENT; 1287 int type = FILEID_UDF_WITHOUT_PARENT;
1289 1288
1290 if (len < 3 || (connectable && len < 5)) 1289 if (connectable && (len < 5)) {
1290 *lenp = 5;
1291 return 255; 1291 return 255;
1292 } else if (len < 3) {
1293 *lenp = 3;
1294 return 255;
1295 }
1292 1296
1293 *lenp = 3; 1297 *lenp = 3;
1294 fid->udf.block = location.logicalBlockNum; 1298 fid->udf.block = location.logicalBlockNum;
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 12f39b9e4437..d6f681535eb8 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -306,7 +306,6 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
306 new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page); 306 new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page);
307 if (!new_de) 307 if (!new_de)
308 goto out_dir; 308 goto out_dir;
309 inode_inc_link_count(old_inode);
310 ufs_set_link(new_dir, new_de, new_page, old_inode); 309 ufs_set_link(new_dir, new_de, new_page, old_inode);
311 new_inode->i_ctime = CURRENT_TIME_SEC; 310 new_inode->i_ctime = CURRENT_TIME_SEC;
312 if (dir_de) 311 if (dir_de)
@@ -318,12 +317,9 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
318 if (new_dir->i_nlink >= UFS_LINK_MAX) 317 if (new_dir->i_nlink >= UFS_LINK_MAX)
319 goto out_dir; 318 goto out_dir;
320 } 319 }
321 inode_inc_link_count(old_inode);
322 err = ufs_add_link(new_dentry, old_inode); 320 err = ufs_add_link(new_dentry, old_inode);
323 if (err) { 321 if (err)
324 inode_dec_link_count(old_inode);
325 goto out_dir; 322 goto out_dir;
326 }
327 if (dir_de) 323 if (dir_de)
328 inode_inc_link_count(new_dir); 324 inode_inc_link_count(new_dir);
329 } 325 }
@@ -331,12 +327,11 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
331 /* 327 /*
332 * Like most other Unix systems, set the ctime for inodes on a 328 * Like most other Unix systems, set the ctime for inodes on a
333 * rename. 329 * rename.
334 * inode_dec_link_count() will mark the inode dirty.
335 */ 330 */
336 old_inode->i_ctime = CURRENT_TIME_SEC; 331 old_inode->i_ctime = CURRENT_TIME_SEC;
337 332
338 ufs_delete_entry(old_dir, old_de, old_page); 333 ufs_delete_entry(old_dir, old_de, old_page);
339 inode_dec_link_count(old_inode); 334 mark_inode_dirty(old_inode);
340 335
341 if (dir_de) { 336 if (dir_de) {
342 ufs_set_link(old_inode, dir_de, dir_page, new_dir); 337 ufs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c
index 05201ae719e5..d61611c88012 100644
--- a/fs/xfs/linux-2.6/xfs_discard.c
+++ b/fs/xfs/linux-2.6/xfs_discard.c
@@ -152,6 +152,8 @@ xfs_ioc_trim(
152 152
153 if (!capable(CAP_SYS_ADMIN)) 153 if (!capable(CAP_SYS_ADMIN))
154 return -XFS_ERROR(EPERM); 154 return -XFS_ERROR(EPERM);
155 if (!blk_queue_discard(q))
156 return -XFS_ERROR(EOPNOTSUPP);
155 if (copy_from_user(&range, urange, sizeof(range))) 157 if (copy_from_user(&range, urange, sizeof(range)))
156 return -XFS_ERROR(EFAULT); 158 return -XFS_ERROR(EFAULT);
157 159
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index fc0114da7fdd..f4f878fc0083 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -89,8 +89,10 @@ xfs_fs_encode_fh(
89 * seven combinations work. The real answer is "don't use v2". 89 * seven combinations work. The real answer is "don't use v2".
90 */ 90 */
91 len = xfs_fileid_length(fileid_type); 91 len = xfs_fileid_length(fileid_type);
92 if (*max_len < len) 92 if (*max_len < len) {
93 *max_len = len;
93 return 255; 94 return 255;
95 }
94 *max_len = len; 96 *max_len = len;
95 97
96 switch (fileid_type) { 98 switch (fileid_type) {
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index f5e2a19e0f8e..0ca0e3c024d7 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -695,14 +695,19 @@ xfs_ioc_fsgeometry_v1(
695 xfs_mount_t *mp, 695 xfs_mount_t *mp,
696 void __user *arg) 696 void __user *arg)
697{ 697{
698 xfs_fsop_geom_v1_t fsgeo; 698 xfs_fsop_geom_t fsgeo;
699 int error; 699 int error;
700 700
701 error = xfs_fs_geometry(mp, (xfs_fsop_geom_t *)&fsgeo, 3); 701 error = xfs_fs_geometry(mp, &fsgeo, 3);
702 if (error) 702 if (error)
703 return -error; 703 return -error;
704 704
705 if (copy_to_user(arg, &fsgeo, sizeof(fsgeo))) 705 /*
706 * Caller should have passed an argument of type
707 * xfs_fsop_geom_v1_t. This is a proper subset of the
708 * xfs_fsop_geom_t that xfs_fs_geometry() fills in.
709 */
710 if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t)))
706 return -XFS_ERROR(EFAULT); 711 return -XFS_ERROR(EFAULT);
707 return 0; 712 return 0;
708} 713}
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index cec89dd5d7d2..85668efb3e3e 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -53,6 +53,9 @@ xfs_fs_geometry(
53 xfs_fsop_geom_t *geo, 53 xfs_fsop_geom_t *geo,
54 int new_version) 54 int new_version)
55{ 55{
56
57 memset(geo, 0, sizeof(*geo));
58
56 geo->blocksize = mp->m_sb.sb_blocksize; 59 geo->blocksize = mp->m_sb.sb_blocksize;
57 geo->rtextsize = mp->m_sb.sb_rextsize; 60 geo->rtextsize = mp->m_sb.sb_rextsize;
58 geo->agblocks = mp->m_sb.sb_agblocks; 61 geo->agblocks = mp->m_sb.sb_agblocks;
diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h
index 2bcc5c7c22a6..61e03dd7939e 100644
--- a/include/asm-generic/cputime.h
+++ b/include/asm-generic/cputime.h
@@ -30,6 +30,9 @@ typedef u64 cputime64_t;
30#define cputime64_to_jiffies64(__ct) (__ct) 30#define cputime64_to_jiffies64(__ct) (__ct)
31#define jiffies64_to_cputime64(__jif) (__jif) 31#define jiffies64_to_cputime64(__jif) (__jif)
32#define cputime_to_cputime64(__ct) ((u64) __ct) 32#define cputime_to_cputime64(__ct) ((u64) __ct)
33#define cputime64_gt(__a, __b) ((__a) > (__b))
34
35#define nsecs_to_cputime64(__ct) nsecs_to_jiffies64(__ct)
33 36
34 37
35/* 38/*
diff --git a/include/asm-generic/fcntl.h b/include/asm-generic/fcntl.h
index 0fc16e3f0bfc..84793c7025e2 100644
--- a/include/asm-generic/fcntl.h
+++ b/include/asm-generic/fcntl.h
@@ -80,6 +80,10 @@
80#define O_SYNC (__O_SYNC|O_DSYNC) 80#define O_SYNC (__O_SYNC|O_DSYNC)
81#endif 81#endif
82 82
83#ifndef O_PATH
84#define O_PATH 010000000
85#endif
86
83#ifndef O_NDELAY 87#ifndef O_NDELAY
84#define O_NDELAY O_NONBLOCK 88#define O_NDELAY O_NONBLOCK
85#endif 89#endif
diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h
index 3c2344f48136..01f227e14254 100644
--- a/include/asm-generic/futex.h
+++ b/include/asm-generic/futex.h
@@ -6,7 +6,7 @@
6#include <asm/errno.h> 6#include <asm/errno.h>
7 7
8static inline int 8static inline int
9futex_atomic_op_inuser (int encoded_op, int __user *uaddr) 9futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
10{ 10{
11 int op = (encoded_op >> 28) & 7; 11 int op = (encoded_op >> 28) & 7;
12 int cmp = (encoded_op >> 24) & 15; 12 int cmp = (encoded_op >> 24) & 15;
@@ -16,7 +16,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
16 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) 16 if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
17 oparg = 1 << oparg; 17 oparg = 1 << oparg;
18 18
19 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) 19 if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
20 return -EFAULT; 20 return -EFAULT;
21 21
22 pagefault_disable(); 22 pagefault_disable();
@@ -48,7 +48,8 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
48} 48}
49 49
50static inline int 50static inline int
51futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) 51futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
52 u32 oldval, u32 newval)
52{ 53{
53 return -ENOSYS; 54 return -ENOSYS;
54} 55}
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 31b6188df221..b4bfe338ea0e 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -4,6 +4,8 @@
4#ifndef __ASSEMBLY__ 4#ifndef __ASSEMBLY__
5#ifdef CONFIG_MMU 5#ifdef CONFIG_MMU
6 6
7#include <linux/mm_types.h>
8
7#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 9#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
8extern int ptep_set_access_flags(struct vm_area_struct *vma, 10extern int ptep_set_access_flags(struct vm_area_struct *vma,
9 unsigned long address, pte_t *ptep, 11 unsigned long address, pte_t *ptep,
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index b3bfabc258f3..c1a1216e29ce 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -11,6 +11,7 @@ extern char _sinittext[], _einittext[];
11extern char _end[]; 11extern char _end[];
12extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[]; 12extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
13extern char __kprobes_text_start[], __kprobes_text_end[]; 13extern char __kprobes_text_start[], __kprobes_text_end[];
14extern char __entry_text_start[], __entry_text_end[];
14extern char __initdata_begin[], __initdata_end[]; 15extern char __initdata_begin[], __initdata_end[];
15extern char __start_rodata[], __end_rodata[]; 16extern char __start_rodata[], __end_rodata[];
16 17
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index b969770196c2..57af0338d270 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -646,9 +646,13 @@ __SYSCALL(__NR_prlimit64, sys_prlimit64)
646__SYSCALL(__NR_fanotify_init, sys_fanotify_init) 646__SYSCALL(__NR_fanotify_init, sys_fanotify_init)
647#define __NR_fanotify_mark 263 647#define __NR_fanotify_mark 263
648__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) 648__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
649#define __NR_name_to_handle_at 264
650__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
651#define __NR_open_by_handle_at 265
652__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
649 653
650#undef __NR_syscalls 654#undef __NR_syscalls
651#define __NR_syscalls 264 655#define __NR_syscalls 266
652 656
653/* 657/*
654 * All syscalls below here should go away really, 658 * All syscalls below here should go away really,
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index fe77e3395b40..906c3ceca9a2 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -424,6 +424,12 @@
424 *(.kprobes.text) \ 424 *(.kprobes.text) \
425 VMLINUX_SYMBOL(__kprobes_text_end) = .; 425 VMLINUX_SYMBOL(__kprobes_text_end) = .;
426 426
427#define ENTRY_TEXT \
428 ALIGN_FUNCTION(); \
429 VMLINUX_SYMBOL(__entry_text_start) = .; \
430 *(.entry.text) \
431 VMLINUX_SYMBOL(__entry_text_end) = .;
432
427#ifdef CONFIG_FUNCTION_GRAPH_TRACER 433#ifdef CONFIG_FUNCTION_GRAPH_TRACER
428#define IRQENTRY_TEXT \ 434#define IRQENTRY_TEXT \
429 ALIGN_FUNCTION(); \ 435 ALIGN_FUNCTION(); \
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index fe29aadb129d..348843b80150 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1101,7 +1101,7 @@ struct drm_device {
1101 struct platform_device *platformdev; /**< Platform device struture */ 1101 struct platform_device *platformdev; /**< Platform device struture */
1102 1102
1103 struct drm_sg_mem *sg; /**< Scatter gather memory */ 1103 struct drm_sg_mem *sg; /**< Scatter gather memory */
1104 int num_crtcs; /**< Number of CRTCs on this device */ 1104 unsigned int num_crtcs; /**< Number of CRTCs on this device */
1105 void *dev_private; /**< device private data */ 1105 void *dev_private; /**< device private data */
1106 void *mm_private; 1106 void *mm_private;
1107 struct address_space *dev_mapping; 1107 struct address_space *dev_mapping;
diff --git a/include/keys/rxrpc-type.h b/include/keys/rxrpc-type.h
index 5cb86c307f5d..fc4875433817 100644
--- a/include/keys/rxrpc-type.h
+++ b/include/keys/rxrpc-type.h
@@ -99,7 +99,6 @@ struct rxrpc_key_token {
99 * structure of raw payloads passed to add_key() or instantiate key 99 * structure of raw payloads passed to add_key() or instantiate key
100 */ 100 */
101struct rxrpc_key_data_v1 { 101struct rxrpc_key_data_v1 {
102 u32 kif_version; /* 1 */
103 u16 security_index; 102 u16 security_index;
104 u16 ticket_length; 103 u16 ticket_length;
105 u32 expiry; /* time_t */ 104 u32 expiry; /* time_t */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4d18ff34670a..d5063e1b5555 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -699,7 +699,7 @@ extern void blk_start_queue(struct request_queue *q);
699extern void blk_stop_queue(struct request_queue *q); 699extern void blk_stop_queue(struct request_queue *q);
700extern void blk_sync_queue(struct request_queue *q); 700extern void blk_sync_queue(struct request_queue *q);
701extern void __blk_stop_queue(struct request_queue *q); 701extern void __blk_stop_queue(struct request_queue *q);
702extern void __blk_run_queue(struct request_queue *); 702extern void __blk_run_queue(struct request_queue *q, bool force_kblockd);
703extern void blk_run_queue(struct request_queue *); 703extern void blk_run_queue(struct request_queue *);
704extern int blk_rq_map_user(struct request_queue *, struct request *, 704extern int blk_rq_map_user(struct request_queue *, struct request *,
705 struct rq_map_data *, void __user *, unsigned long, 705 struct rq_map_data *, void __user *, unsigned long,
@@ -1088,7 +1088,6 @@ static inline void put_dev_sector(Sector p)
1088 1088
1089struct work_struct; 1089struct work_struct;
1090int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); 1090int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
1091int kblockd_schedule_delayed_work(struct request_queue *q, struct delayed_work *dwork, unsigned long delay);
1092 1091
1093#ifdef CONFIG_BLK_CGROUP 1092#ifdef CONFIG_BLK_CGROUP
1094/* 1093/*
@@ -1136,7 +1135,6 @@ static inline uint64_t rq_io_start_time_ns(struct request *req)
1136extern int blk_throtl_init(struct request_queue *q); 1135extern int blk_throtl_init(struct request_queue *q);
1137extern void blk_throtl_exit(struct request_queue *q); 1136extern void blk_throtl_exit(struct request_queue *q);
1138extern int blk_throtl_bio(struct request_queue *q, struct bio **bio); 1137extern int blk_throtl_bio(struct request_queue *q, struct bio **bio);
1139extern void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay);
1140extern void throtl_shutdown_timer_wq(struct request_queue *q); 1138extern void throtl_shutdown_timer_wq(struct request_queue *q);
1141#else /* CONFIG_BLK_DEV_THROTTLING */ 1139#else /* CONFIG_BLK_DEV_THROTTLING */
1142static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio) 1140static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio)
@@ -1146,7 +1144,6 @@ static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio)
1146 1144
1147static inline int blk_throtl_init(struct request_queue *q) { return 0; } 1145static inline int blk_throtl_init(struct request_queue *q) { return 0; }
1148static inline int blk_throtl_exit(struct request_queue *q) { return 0; } 1146static inline int blk_throtl_exit(struct request_queue *q) { return 0; }
1149static inline void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay) {}
1150static inline void throtl_shutdown_timer_wq(struct request_queue *q) {} 1147static inline void throtl_shutdown_timer_wq(struct request_queue *q) {}
1151#endif /* CONFIG_BLK_DEV_THROTTLING */ 1148#endif /* CONFIG_BLK_DEV_THROTTLING */
1152 1149
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 3395cf7130f5..b22fb0d3db0f 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -245,7 +245,6 @@ static inline int blk_cmd_buf_len(struct request *rq)
245 245
246extern void blk_dump_cmd(char *buf, struct request *rq); 246extern void blk_dump_cmd(char *buf, struct request *rq);
247extern void blk_fill_rwbs(char *rwbs, u32 rw, int bytes); 247extern void blk_fill_rwbs(char *rwbs, u32 rw, int bytes);
248extern void blk_fill_rwbs_rq(char *rwbs, struct request *rq);
249 248
250#endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */ 249#endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */
251 250
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index c3011beac30d..31d91a64838b 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -123,6 +123,7 @@ struct ceph_msg_pos {
123#define SOCK_CLOSED 11 /* socket state changed to closed */ 123#define SOCK_CLOSED 11 /* socket state changed to closed */
124#define OPENING 13 /* open connection w/ (possibly new) peer */ 124#define OPENING 13 /* open connection w/ (possibly new) peer */
125#define DEAD 14 /* dead, about to kfree */ 125#define DEAD 14 /* dead, about to kfree */
126#define BACKOFF 15
126 127
127/* 128/*
128 * A single connection with another host. 129 * A single connection with another host.
@@ -160,7 +161,6 @@ struct ceph_connection {
160 struct list_head out_queue; 161 struct list_head out_queue;
161 struct list_head out_sent; /* sending or sent but unacked */ 162 struct list_head out_sent; /* sending or sent but unacked */
162 u64 out_seq; /* last message queued for send */ 163 u64 out_seq; /* last message queued for send */
163 bool out_keepalive_pending;
164 164
165 u64 in_seq, in_seq_acked; /* last message received, acked */ 165 u64 in_seq, in_seq_acked; /* last message received, acked */
166 166
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ce104e33cd22..e654fa239916 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -474,7 +474,8 @@ struct cgroup_subsys {
474 struct cgroup *old_cgrp, struct task_struct *tsk, 474 struct cgroup *old_cgrp, struct task_struct *tsk,
475 bool threadgroup); 475 bool threadgroup);
476 void (*fork)(struct cgroup_subsys *ss, struct task_struct *task); 476 void (*fork)(struct cgroup_subsys *ss, struct task_struct *task);
477 void (*exit)(struct cgroup_subsys *ss, struct task_struct *task); 477 void (*exit)(struct cgroup_subsys *ss, struct cgroup *cgrp,
478 struct cgroup *old_cgrp, struct task_struct *task);
478 int (*populate)(struct cgroup_subsys *ss, 479 int (*populate)(struct cgroup_subsys *ss,
479 struct cgroup *cgrp); 480 struct cgroup *cgrp);
480 void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp); 481 void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp);
@@ -626,6 +627,7 @@ bool css_is_ancestor(struct cgroup_subsys_state *cg,
626/* Get id and depth of css */ 627/* Get id and depth of css */
627unsigned short css_id(struct cgroup_subsys_state *css); 628unsigned short css_id(struct cgroup_subsys_state *css);
628unsigned short css_depth(struct cgroup_subsys_state *css); 629unsigned short css_depth(struct cgroup_subsys_state *css);
630struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id);
629 631
630#else /* !CONFIG_CGROUPS */ 632#else /* !CONFIG_CGROUPS */
631 633
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index ccefff02b6cb..cdbfcb8780ec 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -65,4 +65,8 @@ SUBSYS(net_cls)
65SUBSYS(blkio) 65SUBSYS(blkio)
66#endif 66#endif
67 67
68#ifdef CONFIG_CGROUP_PERF
69SUBSYS(perf)
70#endif
71
68/* */ 72/* */
diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h
index 68cd248f6d3e..66900e3c6eb1 100644
--- a/include/linux/dcbnl.h
+++ b/include/linux/dcbnl.h
@@ -101,8 +101,8 @@ struct ieee_pfc {
101 */ 101 */
102struct dcb_app { 102struct dcb_app {
103 __u8 selector; 103 __u8 selector;
104 __u32 protocol;
105 __u8 priority; 104 __u8 priority;
105 __u16 protocol;
106}; 106};
107 107
108struct dcbmsg { 108struct dcbmsg {
diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h
index 597692f1fc8d..65970b811e22 100644
--- a/include/linux/debugobjects.h
+++ b/include/linux/debugobjects.h
@@ -34,7 +34,10 @@ struct debug_obj {
34 34
35/** 35/**
36 * struct debug_obj_descr - object type specific debug description structure 36 * struct debug_obj_descr - object type specific debug description structure
37 *
37 * @name: name of the object typee 38 * @name: name of the object typee
39 * @debug_hint: function returning address, which have associated
40 * kernel symbol, to allow identify the object
38 * @fixup_init: fixup function, which is called when the init check 41 * @fixup_init: fixup function, which is called when the init check
39 * fails 42 * fails
40 * @fixup_activate: fixup function, which is called when the activate check 43 * @fixup_activate: fixup function, which is called when the activate check
@@ -46,7 +49,7 @@ struct debug_obj {
46 */ 49 */
47struct debug_obj_descr { 50struct debug_obj_descr {
48 const char *name; 51 const char *name;
49 52 void *(*debug_hint) (void *addr);
50 int (*fixup_init) (void *addr, enum debug_obj_state state); 53 int (*fixup_init) (void *addr, enum debug_obj_state state);
51 int (*fixup_activate) (void *addr, enum debug_obj_state state); 54 int (*fixup_activate) (void *addr, enum debug_obj_state state);
52 int (*fixup_destroy) (void *addr, enum debug_obj_state state); 55 int (*fixup_destroy) (void *addr, enum debug_obj_state state);
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 28028988c862..33a42f24b275 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -8,6 +8,9 @@ struct inode;
8struct super_block; 8struct super_block;
9struct vfsmount; 9struct vfsmount;
10 10
11/* limit the handle size to NFSv4 handle size now */
12#define MAX_HANDLE_SZ 128
13
11/* 14/*
12 * The fileid_type identifies how the file within the filesystem is encoded. 15 * The fileid_type identifies how the file within the filesystem is encoded.
13 * In theory this is freely set and parsed by the filesystem, but we try to 16 * In theory this is freely set and parsed by the filesystem, but we try to
@@ -121,8 +124,10 @@ struct fid {
121 * set, the encode_fh() should store sufficient information so that a good 124 * set, the encode_fh() should store sufficient information so that a good
122 * attempt can be made to find not only the file but also it's place in the 125 * attempt can be made to find not only the file but also it's place in the
123 * filesystem. This typically means storing a reference to de->d_parent in 126 * filesystem. This typically means storing a reference to de->d_parent in
124 * the filehandle fragment. encode_fh() should return the number of bytes 127 * the filehandle fragment. encode_fh() should return the fileid_type on
125 * stored or a negative error code such as %-ENOSPC 128 * success and on error returns 255 (if the space needed to encode fh is
129 * greater than @max_len*4 bytes). On error @max_len contains the minimum
130 * size(in 4 byte unit) needed to encode the file handle.
126 * 131 *
127 * fh_to_dentry: 132 * fh_to_dentry:
128 * @fh_to_dentry is given a &struct super_block (@sb) and a file handle 133 * @fh_to_dentry is given a &struct super_block (@sb) and a file handle
diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h
index a562fa5fb4e3..f550f894ba15 100644
--- a/include/linux/fcntl.h
+++ b/include/linux/fcntl.h
@@ -46,6 +46,7 @@
46 unlinking file. */ 46 unlinking file. */
47#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ 47#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */
48#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */ 48#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */
49#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */
49 50
50#ifdef __KERNEL__ 51#ifdef __KERNEL__
51 52
diff --git a/include/linux/file.h b/include/linux/file.h
index e85baebf6279..21a79958541c 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -29,6 +29,8 @@ static inline void fput_light(struct file *file, int fput_needed)
29 29
30extern struct file *fget(unsigned int fd); 30extern struct file *fget(unsigned int fd);
31extern struct file *fget_light(unsigned int fd, int *fput_needed); 31extern struct file *fget_light(unsigned int fd, int *fput_needed);
32extern struct file *fget_raw(unsigned int fd);
33extern struct file *fget_raw_light(unsigned int fd, int *fput_needed);
32extern void set_close_on_exec(unsigned int fd, int flag); 34extern void set_close_on_exec(unsigned int fd, int flag);
33extern void put_filp(struct file *); 35extern void put_filp(struct file *);
34extern int alloc_fd(unsigned start, unsigned flags); 36extern int alloc_fd(unsigned start, unsigned flags);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index bd3215940c37..13df14e2c42e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -102,6 +102,9 @@ struct inodes_stat_t {
102/* File is huge (eg. /dev/kmem): treat loff_t as unsigned */ 102/* File is huge (eg. /dev/kmem): treat loff_t as unsigned */
103#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000) 103#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000)
104 104
105/* File is opened with O_PATH; almost nothing can be done with it */
106#define FMODE_PATH ((__force fmode_t)0x4000)
107
105/* File was opened by fanotify and shouldn't generate fanotify events */ 108/* File was opened by fanotify and shouldn't generate fanotify events */
106#define FMODE_NONOTIFY ((__force fmode_t)0x1000000) 109#define FMODE_NONOTIFY ((__force fmode_t)0x1000000)
107 110
@@ -649,6 +652,7 @@ struct address_space {
649 spinlock_t private_lock; /* for use by the address_space */ 652 spinlock_t private_lock; /* for use by the address_space */
650 struct list_head private_list; /* ditto */ 653 struct list_head private_list; /* ditto */
651 struct address_space *assoc_mapping; /* ditto */ 654 struct address_space *assoc_mapping; /* ditto */
655 struct mutex unmap_mutex; /* to protect unmapping */
652} __attribute__((aligned(sizeof(long)))); 656} __attribute__((aligned(sizeof(long))));
653 /* 657 /*
654 * On most architectures that alignment is already the case; but 658 * On most architectures that alignment is already the case; but
@@ -977,6 +981,13 @@ struct file {
977#endif 981#endif
978}; 982};
979 983
984struct file_handle {
985 __u32 handle_bytes;
986 int handle_type;
987 /* file identifier */
988 unsigned char f_handle[0];
989};
990
980#define get_file(x) atomic_long_inc(&(x)->f_count) 991#define get_file(x) atomic_long_inc(&(x)->f_count)
981#define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) 992#define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1)
982#define file_count(x) atomic_long_read(&(x)->f_count) 993#define file_count(x) atomic_long_read(&(x)->f_count)
@@ -1400,6 +1411,7 @@ struct super_block {
1400 wait_queue_head_t s_wait_unfrozen; 1411 wait_queue_head_t s_wait_unfrozen;
1401 1412
1402 char s_id[32]; /* Informational name */ 1413 char s_id[32]; /* Informational name */
1414 u8 s_uuid[16]; /* UUID */
1403 1415
1404 void *s_fs_info; /* Filesystem private info */ 1416 void *s_fs_info; /* Filesystem private info */
1405 fmode_t s_mode; 1417 fmode_t s_mode;
@@ -1873,6 +1885,8 @@ extern void drop_collected_mounts(struct vfsmount *);
1873extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, 1885extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
1874 struct vfsmount *); 1886 struct vfsmount *);
1875extern int vfs_statfs(struct path *, struct kstatfs *); 1887extern int vfs_statfs(struct path *, struct kstatfs *);
1888extern int user_statfs(const char __user *, struct kstatfs *);
1889extern int fd_statfs(int, struct kstatfs *);
1876extern int statfs_by_dentry(struct dentry *, struct kstatfs *); 1890extern int statfs_by_dentry(struct dentry *, struct kstatfs *);
1877extern int freeze_super(struct super_block *super); 1891extern int freeze_super(struct super_block *super);
1878extern int thaw_super(struct super_block *super); 1892extern int thaw_super(struct super_block *super);
@@ -1989,6 +2003,8 @@ extern int do_fallocate(struct file *file, int mode, loff_t offset,
1989extern long do_sys_open(int dfd, const char __user *filename, int flags, 2003extern long do_sys_open(int dfd, const char __user *filename, int flags,
1990 int mode); 2004 int mode);
1991extern struct file *filp_open(const char *, int, int); 2005extern struct file *filp_open(const char *, int, int);
2006extern struct file *file_open_root(struct dentry *, struct vfsmount *,
2007 const char *, int);
1992extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, 2008extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
1993 const struct cred *); 2009 const struct cred *);
1994extern int filp_close(struct file *, fl_owner_t id); 2010extern int filp_close(struct file *, fl_owner_t id);
@@ -2139,7 +2155,7 @@ extern void check_disk_size_change(struct gendisk *disk,
2139 struct block_device *bdev); 2155 struct block_device *bdev);
2140extern int revalidate_disk(struct gendisk *); 2156extern int revalidate_disk(struct gendisk *);
2141extern int check_disk_change(struct block_device *); 2157extern int check_disk_change(struct block_device *);
2142extern int __invalidate_device(struct block_device *); 2158extern int __invalidate_device(struct block_device *, bool);
2143extern int invalidate_partition(struct gendisk *, int); 2159extern int invalidate_partition(struct gendisk *, int);
2144#endif 2160#endif
2145unsigned long invalidate_mapping_pages(struct address_space *mapping, 2161unsigned long invalidate_mapping_pages(struct address_space *mapping,
@@ -2204,10 +2220,6 @@ extern struct file *create_read_pipe(struct file *f, int flags);
2204extern struct file *create_write_pipe(int flags); 2220extern struct file *create_write_pipe(int flags);
2205extern void free_write_pipe(struct file *); 2221extern void free_write_pipe(struct file *);
2206 2222
2207extern struct file *do_filp_open(int dfd, const char *pathname,
2208 int open_flag, int mode, int acc_mode);
2209extern int may_open(struct path *, int, int);
2210
2211extern int kernel_read(struct file *, loff_t, char *, unsigned long); 2223extern int kernel_read(struct file *, loff_t, char *, unsigned long);
2212extern struct file * open_exec(const char *); 2224extern struct file * open_exec(const char *);
2213 2225
@@ -2225,6 +2237,7 @@ extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin);
2225 2237
2226extern int inode_init_always(struct super_block *, struct inode *); 2238extern int inode_init_always(struct super_block *, struct inode *);
2227extern void inode_init_once(struct inode *); 2239extern void inode_init_once(struct inode *);
2240extern void address_space_init_once(struct address_space *mapping);
2228extern void ihold(struct inode * inode); 2241extern void ihold(struct inode * inode);
2229extern void iput(struct inode *); 2242extern void iput(struct inode *);
2230extern struct inode * igrab(struct inode *); 2243extern struct inode * igrab(struct inode *);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index dcd6a7c3a435..ca29e03c1fac 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -428,6 +428,7 @@ extern void unregister_ftrace_graph(void);
428 428
429extern void ftrace_graph_init_task(struct task_struct *t); 429extern void ftrace_graph_init_task(struct task_struct *t);
430extern void ftrace_graph_exit_task(struct task_struct *t); 430extern void ftrace_graph_exit_task(struct task_struct *t);
431extern void ftrace_graph_init_idle_task(struct task_struct *t, int cpu);
431 432
432static inline int task_curr_ret_stack(struct task_struct *t) 433static inline int task_curr_ret_stack(struct task_struct *t)
433{ 434{
@@ -451,6 +452,7 @@ static inline void unpause_graph_tracing(void)
451 452
452static inline void ftrace_graph_init_task(struct task_struct *t) { } 453static inline void ftrace_graph_init_task(struct task_struct *t) { }
453static inline void ftrace_graph_exit_task(struct task_struct *t) { } 454static inline void ftrace_graph_exit_task(struct task_struct *t) { }
455static inline void ftrace_graph_init_idle_task(struct task_struct *t, int cpu) { }
454 456
455static inline int register_ftrace_graph(trace_func_graph_ret_t retfunc, 457static inline int register_ftrace_graph(trace_func_graph_ret_t retfunc,
456 trace_func_graph_ent_t entryfunc) 458 trace_func_graph_ent_t entryfunc)
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 47e3997f7b5c..22b32af1b5ec 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -37,7 +37,6 @@ struct trace_entry {
37 unsigned char flags; 37 unsigned char flags;
38 unsigned char preempt_count; 38 unsigned char preempt_count;
39 int pid; 39 int pid;
40 int lock_depth;
41}; 40};
42 41
43#define FTRACE_MAX_EVENT \ 42#define FTRACE_MAX_EVENT \
@@ -208,7 +207,6 @@ struct ftrace_event_call {
208 207
209#define PERF_MAX_TRACE_SIZE 2048 208#define PERF_MAX_TRACE_SIZE 2048
210 209
211#define MAX_FILTER_PRED 32
212#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ 210#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */
213 211
214extern void destroy_preds(struct ftrace_event_call *call); 212extern void destroy_preds(struct ftrace_event_call *call);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 0b84c61607e8..dca31761b311 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -332,16 +332,19 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
332 return alloc_pages_current(gfp_mask, order); 332 return alloc_pages_current(gfp_mask, order);
333} 333}
334extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, 334extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
335 struct vm_area_struct *vma, unsigned long addr); 335 struct vm_area_struct *vma, unsigned long addr,
336 int node);
336#else 337#else
337#define alloc_pages(gfp_mask, order) \ 338#define alloc_pages(gfp_mask, order) \
338 alloc_pages_node(numa_node_id(), gfp_mask, order) 339 alloc_pages_node(numa_node_id(), gfp_mask, order)
339#define alloc_pages_vma(gfp_mask, order, vma, addr) \ 340#define alloc_pages_vma(gfp_mask, order, vma, addr, node) \
340 alloc_pages(gfp_mask, order) 341 alloc_pages(gfp_mask, order)
341#endif 342#endif
342#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) 343#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
343#define alloc_page_vma(gfp_mask, vma, addr) \ 344#define alloc_page_vma(gfp_mask, vma, addr) \
344 alloc_pages_vma(gfp_mask, 0, vma, addr) 345 alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id())
346#define alloc_page_vma_node(gfp_mask, vma, addr, node) \
347 alloc_pages_vma(gfp_mask, 0, vma, addr, node)
345 348
346extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); 349extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
347extern unsigned long get_zeroed_page(gfp_t gfp_mask); 350extern unsigned long get_zeroed_page(gfp_t gfp_mask);
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index f376ddc64c4d..62f500c724f9 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -54,11 +54,13 @@ enum hrtimer_restart {
54 * 0x00 inactive 54 * 0x00 inactive
55 * 0x01 enqueued into rbtree 55 * 0x01 enqueued into rbtree
56 * 0x02 callback function running 56 * 0x02 callback function running
57 * 0x04 timer is migrated to another cpu
57 * 58 *
58 * Special cases: 59 * Special cases:
59 * 0x03 callback function running and enqueued 60 * 0x03 callback function running and enqueued
60 * (was requeued on another CPU) 61 * (was requeued on another CPU)
61 * 0x09 timer was migrated on CPU hotunplug 62 * 0x05 timer was migrated on CPU hotunplug
63 *
62 * The "callback function running and enqueued" status is only possible on 64 * The "callback function running and enqueued" status is only possible on
63 * SMP. It happens for example when a posix timer expired and the callback 65 * SMP. It happens for example when a posix timer expired and the callback
64 * queued a signal. Between dropping the lock which protects the posix timer 66 * queued a signal. Between dropping the lock which protects the posix timer
@@ -67,8 +69,11 @@ enum hrtimer_restart {
67 * as otherwise the timer could be removed before the softirq code finishes the 69 * as otherwise the timer could be removed before the softirq code finishes the
68 * the handling of the timer. 70 * the handling of the timer.
69 * 71 *
70 * The HRTIMER_STATE_ENQUEUED bit is always or'ed to the current state to 72 * The HRTIMER_STATE_ENQUEUED bit is always or'ed to the current state
71 * preserve the HRTIMER_STATE_CALLBACK bit in the above scenario. 73 * to preserve the HRTIMER_STATE_CALLBACK in the above scenario. This
74 * also affects HRTIMER_STATE_MIGRATE where the preservation is not
75 * necessary. HRTIMER_STATE_MIGRATE is cleared after the timer is
76 * enqueued on the new cpu.
72 * 77 *
73 * All state transitions are protected by cpu_base->lock. 78 * All state transitions are protected by cpu_base->lock.
74 */ 79 */
@@ -148,7 +153,12 @@ struct hrtimer_clock_base {
148#endif 153#endif
149}; 154};
150 155
151#define HRTIMER_MAX_CLOCK_BASES 2 156enum hrtimer_base_type {
157 HRTIMER_BASE_REALTIME,
158 HRTIMER_BASE_MONOTONIC,
159 HRTIMER_BASE_BOOTTIME,
160 HRTIMER_MAX_CLOCK_BASES,
161};
152 162
153/* 163/*
154 * struct hrtimer_cpu_base - the per cpu clock bases 164 * struct hrtimer_cpu_base - the per cpu clock bases
@@ -308,6 +318,7 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer)
308 318
309extern ktime_t ktime_get(void); 319extern ktime_t ktime_get(void);
310extern ktime_t ktime_get_real(void); 320extern ktime_t ktime_get_real(void);
321extern ktime_t ktime_get_boottime(void);
311 322
312 323
313DECLARE_PER_CPU(struct tick_device, tick_cpu_device); 324DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
@@ -370,8 +381,9 @@ extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp);
370extern ktime_t hrtimer_get_next_event(void); 381extern ktime_t hrtimer_get_next_event(void);
371 382
372/* 383/*
373 * A timer is active, when it is enqueued into the rbtree or the callback 384 * A timer is active, when it is enqueued into the rbtree or the
374 * function is running. 385 * callback function is running or it's in the state of being migrated
386 * to another cpu.
375 */ 387 */
376static inline int hrtimer_active(const struct hrtimer *timer) 388static inline int hrtimer_active(const struct hrtimer *timer)
377{ 389{
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 55e0d4253e49..59b72ca1c5d1 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -14,6 +14,8 @@
14#include <linux/smp.h> 14#include <linux/smp.h>
15#include <linux/percpu.h> 15#include <linux/percpu.h>
16#include <linux/hrtimer.h> 16#include <linux/hrtimer.h>
17#include <linux/kref.h>
18#include <linux/workqueue.h>
17 19
18#include <asm/atomic.h> 20#include <asm/atomic.h>
19#include <asm/ptrace.h> 21#include <asm/ptrace.h>
@@ -55,7 +57,8 @@
55 * Used by threaded interrupts which need to keep the 57 * Used by threaded interrupts which need to keep the
56 * irq line disabled until the threaded handler has been run. 58 * irq line disabled until the threaded handler has been run.
57 * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend 59 * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend
58 * 60 * IRQF_FORCE_RESUME - Force enable it on resume even if IRQF_NO_SUSPEND is set
61 * IRQF_NO_THREAD - Interrupt cannot be threaded
59 */ 62 */
60#define IRQF_DISABLED 0x00000020 63#define IRQF_DISABLED 0x00000020
61#define IRQF_SAMPLE_RANDOM 0x00000040 64#define IRQF_SAMPLE_RANDOM 0x00000040
@@ -67,22 +70,10 @@
67#define IRQF_IRQPOLL 0x00001000 70#define IRQF_IRQPOLL 0x00001000
68#define IRQF_ONESHOT 0x00002000 71#define IRQF_ONESHOT 0x00002000
69#define IRQF_NO_SUSPEND 0x00004000 72#define IRQF_NO_SUSPEND 0x00004000
73#define IRQF_FORCE_RESUME 0x00008000
74#define IRQF_NO_THREAD 0x00010000
70 75
71#define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND) 76#define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
72
73/*
74 * Bits used by threaded handlers:
75 * IRQTF_RUNTHREAD - signals that the interrupt handler thread should run
76 * IRQTF_DIED - handler thread died
77 * IRQTF_WARNED - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed
78 * IRQTF_AFFINITY - irq thread is requested to adjust affinity
79 */
80enum {
81 IRQTF_RUNTHREAD,
82 IRQTF_DIED,
83 IRQTF_WARNED,
84 IRQTF_AFFINITY,
85};
86 77
87/* 78/*
88 * These values can be returned by request_any_context_irq() and 79 * These values can be returned by request_any_context_irq() and
@@ -110,6 +101,7 @@ typedef irqreturn_t (*irq_handler_t)(int, void *);
110 * @thread_fn: interupt handler function for threaded interrupts 101 * @thread_fn: interupt handler function for threaded interrupts
111 * @thread: thread pointer for threaded interrupts 102 * @thread: thread pointer for threaded interrupts
112 * @thread_flags: flags related to @thread 103 * @thread_flags: flags related to @thread
104 * @thread_mask: bitmask for keeping track of @thread activity
113 */ 105 */
114struct irqaction { 106struct irqaction {
115 irq_handler_t handler; 107 irq_handler_t handler;
@@ -120,6 +112,7 @@ struct irqaction {
120 irq_handler_t thread_fn; 112 irq_handler_t thread_fn;
121 struct task_struct *thread; 113 struct task_struct *thread;
122 unsigned long thread_flags; 114 unsigned long thread_flags;
115 unsigned long thread_mask;
123 const char *name; 116 const char *name;
124 struct proc_dir_entry *dir; 117 struct proc_dir_entry *dir;
125} ____cacheline_internodealigned_in_smp; 118} ____cacheline_internodealigned_in_smp;
@@ -240,6 +233,35 @@ extern int irq_can_set_affinity(unsigned int irq);
240extern int irq_select_affinity(unsigned int irq); 233extern int irq_select_affinity(unsigned int irq);
241 234
242extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m); 235extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m);
236
237/**
238 * struct irq_affinity_notify - context for notification of IRQ affinity changes
239 * @irq: Interrupt to which notification applies
240 * @kref: Reference count, for internal use
241 * @work: Work item, for internal use
242 * @notify: Function to be called on change. This will be
243 * called in process context.
244 * @release: Function to be called on release. This will be
245 * called in process context. Once registered, the
246 * structure must only be freed when this function is
247 * called or later.
248 */
249struct irq_affinity_notify {
250 unsigned int irq;
251 struct kref kref;
252 struct work_struct work;
253 void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);
254 void (*release)(struct kref *ref);
255};
256
257extern int
258irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify);
259
260static inline void irq_run_affinity_notifiers(void)
261{
262 flush_scheduled_work();
263}
264
243#else /* CONFIG_SMP */ 265#else /* CONFIG_SMP */
244 266
245static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m) 267static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
@@ -255,7 +277,7 @@ static inline int irq_can_set_affinity(unsigned int irq)
255static inline int irq_select_affinity(unsigned int irq) { return 0; } 277static inline int irq_select_affinity(unsigned int irq) { return 0; }
256 278
257static inline int irq_set_affinity_hint(unsigned int irq, 279static inline int irq_set_affinity_hint(unsigned int irq,
258 const struct cpumask *m) 280 const struct cpumask *m)
259{ 281{
260 return -EINVAL; 282 return -EINVAL;
261} 283}
@@ -314,16 +336,24 @@ static inline void enable_irq_lockdep_irqrestore(unsigned int irq, unsigned long
314} 336}
315 337
316/* IRQ wakeup (PM) control: */ 338/* IRQ wakeup (PM) control: */
317extern int set_irq_wake(unsigned int irq, unsigned int on); 339extern int irq_set_irq_wake(unsigned int irq, unsigned int on);
340
341#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
342/* Please do not use: Use the replacement functions instead */
343static inline int set_irq_wake(unsigned int irq, unsigned int on)
344{
345 return irq_set_irq_wake(irq, on);
346}
347#endif
318 348
319static inline int enable_irq_wake(unsigned int irq) 349static inline int enable_irq_wake(unsigned int irq)
320{ 350{
321 return set_irq_wake(irq, 1); 351 return irq_set_irq_wake(irq, 1);
322} 352}
323 353
324static inline int disable_irq_wake(unsigned int irq) 354static inline int disable_irq_wake(unsigned int irq)
325{ 355{
326 return set_irq_wake(irq, 0); 356 return irq_set_irq_wake(irq, 0);
327} 357}
328 358
329#else /* !CONFIG_GENERIC_HARDIRQS */ 359#else /* !CONFIG_GENERIC_HARDIRQS */
@@ -353,6 +383,13 @@ static inline int disable_irq_wake(unsigned int irq)
353} 383}
354#endif /* CONFIG_GENERIC_HARDIRQS */ 384#endif /* CONFIG_GENERIC_HARDIRQS */
355 385
386
387#ifdef CONFIG_IRQ_FORCED_THREADING
388extern bool force_irqthreads;
389#else
390#define force_irqthreads (0)
391#endif
392
356#ifndef __ARCH_SET_SOFTIRQ_PENDING 393#ifndef __ARCH_SET_SOFTIRQ_PENDING
357#define set_softirq_pending(x) (local_softirq_pending() = (x)) 394#define set_softirq_pending(x) (local_softirq_pending() = (x))
358#define or_softirq_pending(x) (local_softirq_pending() |= (x)) 395#define or_softirq_pending(x) (local_softirq_pending() |= (x))
@@ -426,6 +463,13 @@ extern void raise_softirq(unsigned int nr);
426 */ 463 */
427DECLARE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list); 464DECLARE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
428 465
466DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
467
468static inline struct task_struct *this_cpu_ksoftirqd(void)
469{
470 return this_cpu_read(ksoftirqd);
471}
472
429/* Try to send a softirq to a remote cpu. If this cannot be done, the 473/* Try to send a softirq to a remote cpu. If this cannot be done, the
430 * work will be queued to the local cpu. 474 * work will be queued to the local cpu.
431 */ 475 */
@@ -645,6 +689,7 @@ static inline void init_irq_proc(void)
645 689
646struct seq_file; 690struct seq_file;
647int show_interrupts(struct seq_file *p, void *v); 691int show_interrupts(struct seq_file *p, void *v);
692int arch_show_interrupts(struct seq_file *p, int prec);
648 693
649extern int early_irq_init(void); 694extern int early_irq_init(void);
650extern int arch_probe_nr_irqs(void); 695extern int arch_probe_nr_irqs(void);
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 80fcb53057bc..1d3577f30d45 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -29,61 +29,104 @@
29#include <asm/irq_regs.h> 29#include <asm/irq_regs.h>
30 30
31struct irq_desc; 31struct irq_desc;
32struct irq_data;
32typedef void (*irq_flow_handler_t)(unsigned int irq, 33typedef void (*irq_flow_handler_t)(unsigned int irq,
33 struct irq_desc *desc); 34 struct irq_desc *desc);
34 35typedef void (*irq_preflow_handler_t)(struct irq_data *data);
35 36
36/* 37/*
37 * IRQ line status. 38 * IRQ line status.
38 * 39 *
39 * Bits 0-7 are reserved for the IRQF_* bits in linux/interrupt.h 40 * Bits 0-7 are the same as the IRQF_* bits in linux/interrupt.h
41 *
42 * IRQ_TYPE_NONE - default, unspecified type
43 * IRQ_TYPE_EDGE_RISING - rising edge triggered
44 * IRQ_TYPE_EDGE_FALLING - falling edge triggered
45 * IRQ_TYPE_EDGE_BOTH - rising and falling edge triggered
46 * IRQ_TYPE_LEVEL_HIGH - high level triggered
47 * IRQ_TYPE_LEVEL_LOW - low level triggered
48 * IRQ_TYPE_LEVEL_MASK - Mask to filter out the level bits
49 * IRQ_TYPE_SENSE_MASK - Mask for all the above bits
50 * IRQ_TYPE_PROBE - Special flag for probing in progress
51 *
52 * Bits which can be modified via irq_set/clear/modify_status_flags()
53 * IRQ_LEVEL - Interrupt is level type. Will be also
54 * updated in the code when the above trigger
55 * bits are modified via set_irq_type()
56 * IRQ_PER_CPU - Mark an interrupt PER_CPU. Will protect
57 * it from affinity setting
58 * IRQ_NOPROBE - Interrupt cannot be probed by autoprobing
59 * IRQ_NOREQUEST - Interrupt cannot be requested via
60 * request_irq()
61 * IRQ_NOAUTOEN - Interrupt is not automatically enabled in
62 * request/setup_irq()
63 * IRQ_NO_BALANCING - Interrupt cannot be balanced (affinity set)
64 * IRQ_MOVE_PCNTXT - Interrupt can be migrated from process context
65 * IRQ_NESTED_TRHEAD - Interrupt nests into another thread
66 *
67 * Deprecated bits. They are kept updated as long as
68 * CONFIG_GENERIC_HARDIRQS_NO_COMPAT is not set. Will go away soon. These bits
69 * are internal state of the core code and if you really need to acces
70 * them then talk to the genirq maintainer instead of hacking
71 * something weird.
40 * 72 *
41 * IRQ types
42 */ 73 */
43#define IRQ_TYPE_NONE 0x00000000 /* Default, unspecified type */ 74enum {
44#define IRQ_TYPE_EDGE_RISING 0x00000001 /* Edge rising type */ 75 IRQ_TYPE_NONE = 0x00000000,
45#define IRQ_TYPE_EDGE_FALLING 0x00000002 /* Edge falling type */ 76 IRQ_TYPE_EDGE_RISING = 0x00000001,
46#define IRQ_TYPE_EDGE_BOTH (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING) 77 IRQ_TYPE_EDGE_FALLING = 0x00000002,
47#define IRQ_TYPE_LEVEL_HIGH 0x00000004 /* Level high type */ 78 IRQ_TYPE_EDGE_BOTH = (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING),
48#define IRQ_TYPE_LEVEL_LOW 0x00000008 /* Level low type */ 79 IRQ_TYPE_LEVEL_HIGH = 0x00000004,
49#define IRQ_TYPE_SENSE_MASK 0x0000000f /* Mask of the above */ 80 IRQ_TYPE_LEVEL_LOW = 0x00000008,
50#define IRQ_TYPE_PROBE 0x00000010 /* Probing in progress */ 81 IRQ_TYPE_LEVEL_MASK = (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH),
51 82 IRQ_TYPE_SENSE_MASK = 0x0000000f,
52/* Internal flags */ 83
53#define IRQ_INPROGRESS 0x00000100 /* IRQ handler active - do not enter! */ 84 IRQ_TYPE_PROBE = 0x00000010,
54#define IRQ_DISABLED 0x00000200 /* IRQ disabled - do not enter! */ 85
55#define IRQ_PENDING 0x00000400 /* IRQ pending - replay on enable */ 86 IRQ_LEVEL = (1 << 8),
56#define IRQ_REPLAY 0x00000800 /* IRQ has been replayed but not acked yet */ 87 IRQ_PER_CPU = (1 << 9),
57#define IRQ_AUTODETECT 0x00001000 /* IRQ is being autodetected */ 88 IRQ_NOPROBE = (1 << 10),
58#define IRQ_WAITING 0x00002000 /* IRQ not yet seen - for autodetection */ 89 IRQ_NOREQUEST = (1 << 11),
59#define IRQ_LEVEL 0x00004000 /* IRQ level triggered */ 90 IRQ_NOAUTOEN = (1 << 12),
60#define IRQ_MASKED 0x00008000 /* IRQ masked - shouldn't be seen again */ 91 IRQ_NO_BALANCING = (1 << 13),
61#define IRQ_PER_CPU 0x00010000 /* IRQ is per CPU */ 92 IRQ_MOVE_PCNTXT = (1 << 14),
62#define IRQ_NOPROBE 0x00020000 /* IRQ is not valid for probing */ 93 IRQ_NESTED_THREAD = (1 << 15),
63#define IRQ_NOREQUEST 0x00040000 /* IRQ cannot be requested */ 94
64#define IRQ_NOAUTOEN 0x00080000 /* IRQ will not be enabled on request irq */ 95#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
65#define IRQ_WAKEUP 0x00100000 /* IRQ triggers system wakeup */ 96 IRQ_INPROGRESS = (1 << 16),
66#define IRQ_MOVE_PENDING 0x00200000 /* need to re-target IRQ destination */ 97 IRQ_REPLAY = (1 << 17),
67#define IRQ_NO_BALANCING 0x00400000 /* IRQ is excluded from balancing */ 98 IRQ_WAITING = (1 << 18),
68#define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */ 99 IRQ_DISABLED = (1 << 19),
69#define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */ 100 IRQ_PENDING = (1 << 20),
70#define IRQ_AFFINITY_SET 0x02000000 /* IRQ affinity was set from userspace*/ 101 IRQ_MASKED = (1 << 21),
71#define IRQ_SUSPENDED 0x04000000 /* IRQ has gone through suspend sequence */ 102 IRQ_MOVE_PENDING = (1 << 22),
72#define IRQ_ONESHOT 0x08000000 /* IRQ is not unmasked after hardirq */ 103 IRQ_AFFINITY_SET = (1 << 23),
73#define IRQ_NESTED_THREAD 0x10000000 /* IRQ is nested into another, no own handler thread */ 104 IRQ_WAKEUP = (1 << 24),
105#endif
106};
74 107
75#define IRQF_MODIFY_MASK \ 108#define IRQF_MODIFY_MASK \
76 (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \ 109 (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \
77 IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \ 110 IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \
78 IRQ_PER_CPU) 111 IRQ_PER_CPU | IRQ_NESTED_THREAD)
79 112
80#ifdef CONFIG_IRQ_PER_CPU 113#define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING)
81# define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) 114
82# define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING) 115static inline __deprecated bool CHECK_IRQ_PER_CPU(unsigned int status)
83#else 116{
84# define CHECK_IRQ_PER_CPU(var) 0 117 return status & IRQ_PER_CPU;
85# define IRQ_NO_BALANCING_MASK IRQ_NO_BALANCING 118}
86#endif 119
120/*
121 * Return value for chip->irq_set_affinity()
122 *
123 * IRQ_SET_MASK_OK - OK, core updates irq_data.affinity
124 * IRQ_SET_MASK_NOCPY - OK, chip did update irq_data.affinity
125 */
126enum {
127 IRQ_SET_MASK_OK = 0,
128 IRQ_SET_MASK_OK_NOCOPY,
129};
87 130
88struct msi_desc; 131struct msi_desc;
89 132
@@ -91,6 +134,8 @@ struct msi_desc;
91 * struct irq_data - per irq and irq chip data passed down to chip functions 134 * struct irq_data - per irq and irq chip data passed down to chip functions
92 * @irq: interrupt number 135 * @irq: interrupt number
93 * @node: node index useful for balancing 136 * @node: node index useful for balancing
137 * @state_use_accessor: status information for irq chip functions.
138 * Use accessor functions to deal with it
94 * @chip: low level interrupt hardware access 139 * @chip: low level interrupt hardware access
95 * @handler_data: per-IRQ data for the irq_chip methods 140 * @handler_data: per-IRQ data for the irq_chip methods
96 * @chip_data: platform-specific per-chip private data for the chip 141 * @chip_data: platform-specific per-chip private data for the chip
@@ -105,6 +150,7 @@ struct msi_desc;
105struct irq_data { 150struct irq_data {
106 unsigned int irq; 151 unsigned int irq;
107 unsigned int node; 152 unsigned int node;
153 unsigned int state_use_accessors;
108 struct irq_chip *chip; 154 struct irq_chip *chip;
109 void *handler_data; 155 void *handler_data;
110 void *chip_data; 156 void *chip_data;
@@ -114,6 +160,80 @@ struct irq_data {
114#endif 160#endif
115}; 161};
116 162
163/*
164 * Bit masks for irq_data.state
165 *
166 * IRQD_TRIGGER_MASK - Mask for the trigger type bits
167 * IRQD_SETAFFINITY_PENDING - Affinity setting is pending
168 * IRQD_NO_BALANCING - Balancing disabled for this IRQ
169 * IRQD_PER_CPU - Interrupt is per cpu
170 * IRQD_AFFINITY_SET - Interrupt affinity was set
171 * IRQD_LEVEL - Interrupt is level triggered
172 * IRQD_WAKEUP_STATE - Interrupt is configured for wakeup
173 * from suspend
174 * IRDQ_MOVE_PCNTXT - Interrupt can be moved in process
175 * context
176 */
177enum {
178 IRQD_TRIGGER_MASK = 0xf,
179 IRQD_SETAFFINITY_PENDING = (1 << 8),
180 IRQD_NO_BALANCING = (1 << 10),
181 IRQD_PER_CPU = (1 << 11),
182 IRQD_AFFINITY_SET = (1 << 12),
183 IRQD_LEVEL = (1 << 13),
184 IRQD_WAKEUP_STATE = (1 << 14),
185 IRQD_MOVE_PCNTXT = (1 << 15),
186};
187
188static inline bool irqd_is_setaffinity_pending(struct irq_data *d)
189{
190 return d->state_use_accessors & IRQD_SETAFFINITY_PENDING;
191}
192
193static inline bool irqd_is_per_cpu(struct irq_data *d)
194{
195 return d->state_use_accessors & IRQD_PER_CPU;
196}
197
198static inline bool irqd_can_balance(struct irq_data *d)
199{
200 return !(d->state_use_accessors & (IRQD_PER_CPU | IRQD_NO_BALANCING));
201}
202
203static inline bool irqd_affinity_was_set(struct irq_data *d)
204{
205 return d->state_use_accessors & IRQD_AFFINITY_SET;
206}
207
208static inline u32 irqd_get_trigger_type(struct irq_data *d)
209{
210 return d->state_use_accessors & IRQD_TRIGGER_MASK;
211}
212
213/*
214 * Must only be called inside irq_chip.irq_set_type() functions.
215 */
216static inline void irqd_set_trigger_type(struct irq_data *d, u32 type)
217{
218 d->state_use_accessors &= ~IRQD_TRIGGER_MASK;
219 d->state_use_accessors |= type & IRQD_TRIGGER_MASK;
220}
221
222static inline bool irqd_is_level_type(struct irq_data *d)
223{
224 return d->state_use_accessors & IRQD_LEVEL;
225}
226
227static inline bool irqd_is_wakeup_set(struct irq_data *d)
228{
229 return d->state_use_accessors & IRQD_WAKEUP_STATE;
230}
231
232static inline bool irqd_can_move_in_process_context(struct irq_data *d)
233{
234 return d->state_use_accessors & IRQD_MOVE_PCNTXT;
235}
236
117/** 237/**
118 * struct irq_chip - hardware interrupt chip descriptor 238 * struct irq_chip - hardware interrupt chip descriptor
119 * 239 *
@@ -150,6 +270,7 @@ struct irq_data {
150 * @irq_set_wake: enable/disable power-management wake-on of an IRQ 270 * @irq_set_wake: enable/disable power-management wake-on of an IRQ
151 * @irq_bus_lock: function to lock access to slow bus (i2c) chips 271 * @irq_bus_lock: function to lock access to slow bus (i2c) chips
152 * @irq_bus_sync_unlock:function to sync and unlock slow bus (i2c) chips 272 * @irq_bus_sync_unlock:function to sync and unlock slow bus (i2c) chips
273 * @flags: chip specific flags
153 * 274 *
154 * @release: release function solely used by UML 275 * @release: release function solely used by UML
155 */ 276 */
@@ -196,12 +317,27 @@ struct irq_chip {
196 void (*irq_bus_lock)(struct irq_data *data); 317 void (*irq_bus_lock)(struct irq_data *data);
197 void (*irq_bus_sync_unlock)(struct irq_data *data); 318 void (*irq_bus_sync_unlock)(struct irq_data *data);
198 319
320 unsigned long flags;
321
199 /* Currently used only by UML, might disappear one day.*/ 322 /* Currently used only by UML, might disappear one day.*/
200#ifdef CONFIG_IRQ_RELEASE_METHOD 323#ifdef CONFIG_IRQ_RELEASE_METHOD
201 void (*release)(unsigned int irq, void *dev_id); 324 void (*release)(unsigned int irq, void *dev_id);
202#endif 325#endif
203}; 326};
204 327
328/*
329 * irq_chip specific flags
330 *
331 * IRQCHIP_SET_TYPE_MASKED: Mask before calling chip.irq_set_type()
332 * IRQCHIP_EOI_IF_HANDLED: Only issue irq_eoi() when irq was handled
333 * IRQCHIP_MASK_ON_SUSPEND: Mask non wake irqs in the suspend path
334 */
335enum {
336 IRQCHIP_SET_TYPE_MASKED = (1 << 0),
337 IRQCHIP_EOI_IF_HANDLED = (1 << 1),
338 IRQCHIP_MASK_ON_SUSPEND = (1 << 2),
339};
340
205/* This include will go away once we isolated irq_desc usage to core code */ 341/* This include will go away once we isolated irq_desc usage to core code */
206#include <linux/irqdesc.h> 342#include <linux/irqdesc.h>
207 343
@@ -218,7 +354,7 @@ struct irq_chip {
218# define ARCH_IRQ_INIT_FLAGS 0 354# define ARCH_IRQ_INIT_FLAGS 0
219#endif 355#endif
220 356
221#define IRQ_DEFAULT_INIT_FLAGS (IRQ_DISABLED | ARCH_IRQ_INIT_FLAGS) 357#define IRQ_DEFAULT_INIT_FLAGS ARCH_IRQ_INIT_FLAGS
222 358
223struct irqaction; 359struct irqaction;
224extern int setup_irq(unsigned int irq, struct irqaction *new); 360extern int setup_irq(unsigned int irq, struct irqaction *new);
@@ -229,9 +365,13 @@ extern void remove_irq(unsigned int irq, struct irqaction *act);
229#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ) 365#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ)
230void move_native_irq(int irq); 366void move_native_irq(int irq);
231void move_masked_irq(int irq); 367void move_masked_irq(int irq);
368void irq_move_irq(struct irq_data *data);
369void irq_move_masked_irq(struct irq_data *data);
232#else 370#else
233static inline void move_native_irq(int irq) { } 371static inline void move_native_irq(int irq) { }
234static inline void move_masked_irq(int irq) { } 372static inline void move_masked_irq(int irq) { }
373static inline void irq_move_irq(struct irq_data *data) { }
374static inline void irq_move_masked_irq(struct irq_data *data) { }
235#endif 375#endif
236 376
237extern int no_irq_affinity; 377extern int no_irq_affinity;
@@ -267,23 +407,23 @@ extern struct irq_chip no_irq_chip;
267extern struct irq_chip dummy_irq_chip; 407extern struct irq_chip dummy_irq_chip;
268 408
269extern void 409extern void
270set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip, 410irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
271 irq_flow_handler_t handle);
272extern void
273set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
274 irq_flow_handler_t handle, const char *name); 411 irq_flow_handler_t handle, const char *name);
275 412
413static inline void irq_set_chip_and_handler(unsigned int irq, struct irq_chip *chip,
414 irq_flow_handler_t handle)
415{
416 irq_set_chip_and_handler_name(irq, chip, handle, NULL);
417}
418
276extern void 419extern void
277__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, 420__irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
278 const char *name); 421 const char *name);
279 422
280/*
281 * Set a highlevel flow handler for a given IRQ:
282 */
283static inline void 423static inline void
284set_irq_handler(unsigned int irq, irq_flow_handler_t handle) 424irq_set_handler(unsigned int irq, irq_flow_handler_t handle)
285{ 425{
286 __set_irq_handler(irq, handle, 0, NULL); 426 __irq_set_handler(irq, handle, 0, NULL);
287} 427}
288 428
289/* 429/*
@@ -292,14 +432,11 @@ set_irq_handler(unsigned int irq, irq_flow_handler_t handle)
292 * IRQ_NOREQUEST and IRQ_NOPROBE) 432 * IRQ_NOREQUEST and IRQ_NOPROBE)
293 */ 433 */
294static inline void 434static inline void
295set_irq_chained_handler(unsigned int irq, 435irq_set_chained_handler(unsigned int irq, irq_flow_handler_t handle)
296 irq_flow_handler_t handle)
297{ 436{
298 __set_irq_handler(irq, handle, 1, NULL); 437 __irq_set_handler(irq, handle, 1, NULL);
299} 438}
300 439
301extern void set_irq_nested_thread(unsigned int irq, int nest);
302
303void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set); 440void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set);
304 441
305static inline void irq_set_status_flags(unsigned int irq, unsigned long set) 442static inline void irq_set_status_flags(unsigned int irq, unsigned long set)
@@ -312,16 +449,24 @@ static inline void irq_clear_status_flags(unsigned int irq, unsigned long clr)
312 irq_modify_status(irq, clr, 0); 449 irq_modify_status(irq, clr, 0);
313} 450}
314 451
315static inline void set_irq_noprobe(unsigned int irq) 452static inline void irq_set_noprobe(unsigned int irq)
316{ 453{
317 irq_modify_status(irq, 0, IRQ_NOPROBE); 454 irq_modify_status(irq, 0, IRQ_NOPROBE);
318} 455}
319 456
320static inline void set_irq_probe(unsigned int irq) 457static inline void irq_set_probe(unsigned int irq)
321{ 458{
322 irq_modify_status(irq, IRQ_NOPROBE, 0); 459 irq_modify_status(irq, IRQ_NOPROBE, 0);
323} 460}
324 461
462static inline void irq_set_nested_thread(unsigned int irq, bool nest)
463{
464 if (nest)
465 irq_set_status_flags(irq, IRQ_NESTED_THREAD);
466 else
467 irq_clear_status_flags(irq, IRQ_NESTED_THREAD);
468}
469
325/* Handle dynamic irq creation and destruction */ 470/* Handle dynamic irq creation and destruction */
326extern unsigned int create_irq_nr(unsigned int irq_want, int node); 471extern unsigned int create_irq_nr(unsigned int irq_want, int node);
327extern int create_irq(void); 472extern int create_irq(void);
@@ -338,14 +483,14 @@ static inline void dynamic_irq_init(unsigned int irq)
338} 483}
339 484
340/* Set/get chip/data for an IRQ: */ 485/* Set/get chip/data for an IRQ: */
341extern int set_irq_chip(unsigned int irq, struct irq_chip *chip); 486extern int irq_set_chip(unsigned int irq, struct irq_chip *chip);
342extern int set_irq_data(unsigned int irq, void *data); 487extern int irq_set_handler_data(unsigned int irq, void *data);
343extern int set_irq_chip_data(unsigned int irq, void *data); 488extern int irq_set_chip_data(unsigned int irq, void *data);
344extern int set_irq_type(unsigned int irq, unsigned int type); 489extern int irq_set_irq_type(unsigned int irq, unsigned int type);
345extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); 490extern int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry);
346extern struct irq_data *irq_get_irq_data(unsigned int irq); 491extern struct irq_data *irq_get_irq_data(unsigned int irq);
347 492
348static inline struct irq_chip *get_irq_chip(unsigned int irq) 493static inline struct irq_chip *irq_get_chip(unsigned int irq)
349{ 494{
350 struct irq_data *d = irq_get_irq_data(irq); 495 struct irq_data *d = irq_get_irq_data(irq);
351 return d ? d->chip : NULL; 496 return d ? d->chip : NULL;
@@ -356,7 +501,7 @@ static inline struct irq_chip *irq_data_get_irq_chip(struct irq_data *d)
356 return d->chip; 501 return d->chip;
357} 502}
358 503
359static inline void *get_irq_chip_data(unsigned int irq) 504static inline void *irq_get_chip_data(unsigned int irq)
360{ 505{
361 struct irq_data *d = irq_get_irq_data(irq); 506 struct irq_data *d = irq_get_irq_data(irq);
362 return d ? d->chip_data : NULL; 507 return d ? d->chip_data : NULL;
@@ -367,18 +512,18 @@ static inline void *irq_data_get_irq_chip_data(struct irq_data *d)
367 return d->chip_data; 512 return d->chip_data;
368} 513}
369 514
370static inline void *get_irq_data(unsigned int irq) 515static inline void *irq_get_handler_data(unsigned int irq)
371{ 516{
372 struct irq_data *d = irq_get_irq_data(irq); 517 struct irq_data *d = irq_get_irq_data(irq);
373 return d ? d->handler_data : NULL; 518 return d ? d->handler_data : NULL;
374} 519}
375 520
376static inline void *irq_data_get_irq_data(struct irq_data *d) 521static inline void *irq_data_get_irq_handler_data(struct irq_data *d)
377{ 522{
378 return d->handler_data; 523 return d->handler_data;
379} 524}
380 525
381static inline struct msi_desc *get_irq_msi(unsigned int irq) 526static inline struct msi_desc *irq_get_msi_desc(unsigned int irq)
382{ 527{
383 struct irq_data *d = irq_get_irq_data(irq); 528 struct irq_data *d = irq_get_irq_data(irq);
384 return d ? d->msi_desc : NULL; 529 return d ? d->msi_desc : NULL;
@@ -389,6 +534,89 @@ static inline struct msi_desc *irq_data_get_msi(struct irq_data *d)
389 return d->msi_desc; 534 return d->msi_desc;
390} 535}
391 536
537#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
538/* Please do not use: Use the replacement functions instead */
539static inline int set_irq_chip(unsigned int irq, struct irq_chip *chip)
540{
541 return irq_set_chip(irq, chip);
542}
543static inline int set_irq_data(unsigned int irq, void *data)
544{
545 return irq_set_handler_data(irq, data);
546}
547static inline int set_irq_chip_data(unsigned int irq, void *data)
548{
549 return irq_set_chip_data(irq, data);
550}
551static inline int set_irq_type(unsigned int irq, unsigned int type)
552{
553 return irq_set_irq_type(irq, type);
554}
555static inline int set_irq_msi(unsigned int irq, struct msi_desc *entry)
556{
557 return irq_set_msi_desc(irq, entry);
558}
559static inline struct irq_chip *get_irq_chip(unsigned int irq)
560{
561 return irq_get_chip(irq);
562}
563static inline void *get_irq_chip_data(unsigned int irq)
564{
565 return irq_get_chip_data(irq);
566}
567static inline void *get_irq_data(unsigned int irq)
568{
569 return irq_get_handler_data(irq);
570}
571static inline void *irq_data_get_irq_data(struct irq_data *d)
572{
573 return irq_data_get_irq_handler_data(d);
574}
575static inline struct msi_desc *get_irq_msi(unsigned int irq)
576{
577 return irq_get_msi_desc(irq);
578}
579static inline void set_irq_noprobe(unsigned int irq)
580{
581 irq_set_noprobe(irq);
582}
583static inline void set_irq_probe(unsigned int irq)
584{
585 irq_set_probe(irq);
586}
587static inline void set_irq_nested_thread(unsigned int irq, int nest)
588{
589 irq_set_nested_thread(irq, nest);
590}
591static inline void
592set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
593 irq_flow_handler_t handle, const char *name)
594{
595 irq_set_chip_and_handler_name(irq, chip, handle, name);
596}
597static inline void
598set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip,
599 irq_flow_handler_t handle)
600{
601 irq_set_chip_and_handler(irq, chip, handle);
602}
603static inline void
604__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
605 const char *name)
606{
607 __irq_set_handler(irq, handle, is_chained, name);
608}
609static inline void set_irq_handler(unsigned int irq, irq_flow_handler_t handle)
610{
611 irq_set_handler(irq, handle);
612}
613static inline void
614set_irq_chained_handler(unsigned int irq, irq_flow_handler_t handle)
615{
616 irq_set_chained_handler(irq, handle);
617}
618#endif
619
392int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node); 620int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node);
393void irq_free_descs(unsigned int irq, unsigned int cnt); 621void irq_free_descs(unsigned int irq, unsigned int cnt);
394int irq_reserve_irqs(unsigned int from, unsigned int cnt); 622int irq_reserve_irqs(unsigned int from, unsigned int cnt);
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index c1a95b7b58de..00218371518b 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -8,6 +8,7 @@
8 * For now it's included from <linux/irq.h> 8 * For now it's included from <linux/irq.h>
9 */ 9 */
10 10
11struct irq_affinity_notify;
11struct proc_dir_entry; 12struct proc_dir_entry;
12struct timer_rand_state; 13struct timer_rand_state;
13/** 14/**
@@ -18,13 +19,16 @@ struct timer_rand_state;
18 * @handle_irq: highlevel irq-events handler [if NULL, __do_IRQ()] 19 * @handle_irq: highlevel irq-events handler [if NULL, __do_IRQ()]
19 * @action: the irq action chain 20 * @action: the irq action chain
20 * @status: status information 21 * @status: status information
22 * @core_internal_state__do_not_mess_with_it: core internal status information
21 * @depth: disable-depth, for nested irq_disable() calls 23 * @depth: disable-depth, for nested irq_disable() calls
22 * @wake_depth: enable depth, for multiple set_irq_wake() callers 24 * @wake_depth: enable depth, for multiple set_irq_wake() callers
23 * @irq_count: stats field to detect stalled irqs 25 * @irq_count: stats field to detect stalled irqs
24 * @last_unhandled: aging timer for unhandled count 26 * @last_unhandled: aging timer for unhandled count
25 * @irqs_unhandled: stats field for spurious unhandled interrupts 27 * @irqs_unhandled: stats field for spurious unhandled interrupts
26 * @lock: locking for SMP 28 * @lock: locking for SMP
29 * @affinity_notify: context for notification of affinity changes
27 * @pending_mask: pending rebalanced interrupts 30 * @pending_mask: pending rebalanced interrupts
31 * @threads_oneshot: bitfield to handle shared oneshot threads
28 * @threads_active: number of irqaction threads currently running 32 * @threads_active: number of irqaction threads currently running
29 * @wait_for_threads: wait queue for sync_irq to wait for threaded handlers 33 * @wait_for_threads: wait queue for sync_irq to wait for threaded handlers
30 * @dir: /proc/irq/ procfs entry 34 * @dir: /proc/irq/ procfs entry
@@ -45,6 +49,7 @@ struct irq_desc {
45 struct { 49 struct {
46 unsigned int irq; 50 unsigned int irq;
47 unsigned int node; 51 unsigned int node;
52 unsigned int pad_do_not_even_think_about_it;
48 struct irq_chip *chip; 53 struct irq_chip *chip;
49 void *handler_data; 54 void *handler_data;
50 void *chip_data; 55 void *chip_data;
@@ -59,9 +64,16 @@ struct irq_desc {
59 struct timer_rand_state *timer_rand_state; 64 struct timer_rand_state *timer_rand_state;
60 unsigned int __percpu *kstat_irqs; 65 unsigned int __percpu *kstat_irqs;
61 irq_flow_handler_t handle_irq; 66 irq_flow_handler_t handle_irq;
67#ifdef CONFIG_IRQ_PREFLOW_FASTEOI
68 irq_preflow_handler_t preflow_handler;
69#endif
62 struct irqaction *action; /* IRQ action list */ 70 struct irqaction *action; /* IRQ action list */
71#ifdef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
72 unsigned int status_use_accessors;
73#else
63 unsigned int status; /* IRQ status */ 74 unsigned int status; /* IRQ status */
64 75#endif
76 unsigned int core_internal_state__do_not_mess_with_it;
65 unsigned int depth; /* nested irq disables */ 77 unsigned int depth; /* nested irq disables */
66 unsigned int wake_depth; /* nested wake enables */ 78 unsigned int wake_depth; /* nested wake enables */
67 unsigned int irq_count; /* For detecting broken IRQs */ 79 unsigned int irq_count; /* For detecting broken IRQs */
@@ -70,10 +82,12 @@ struct irq_desc {
70 raw_spinlock_t lock; 82 raw_spinlock_t lock;
71#ifdef CONFIG_SMP 83#ifdef CONFIG_SMP
72 const struct cpumask *affinity_hint; 84 const struct cpumask *affinity_hint;
85 struct irq_affinity_notify *affinity_notify;
73#ifdef CONFIG_GENERIC_PENDING_IRQ 86#ifdef CONFIG_GENERIC_PENDING_IRQ
74 cpumask_var_t pending_mask; 87 cpumask_var_t pending_mask;
75#endif 88#endif
76#endif 89#endif
90 unsigned long threads_oneshot;
77 atomic_t threads_active; 91 atomic_t threads_active;
78 wait_queue_head_t wait_for_threads; 92 wait_queue_head_t wait_for_threads;
79#ifdef CONFIG_PROC_FS 93#ifdef CONFIG_PROC_FS
@@ -95,10 +109,51 @@ static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
95 109
96#ifdef CONFIG_GENERIC_HARDIRQS 110#ifdef CONFIG_GENERIC_HARDIRQS
97 111
98#define get_irq_desc_chip(desc) ((desc)->irq_data.chip) 112static inline struct irq_data *irq_desc_get_irq_data(struct irq_desc *desc)
99#define get_irq_desc_chip_data(desc) ((desc)->irq_data.chip_data) 113{
100#define get_irq_desc_data(desc) ((desc)->irq_data.handler_data) 114 return &desc->irq_data;
101#define get_irq_desc_msi(desc) ((desc)->irq_data.msi_desc) 115}
116
117static inline struct irq_chip *irq_desc_get_chip(struct irq_desc *desc)
118{
119 return desc->irq_data.chip;
120}
121
122static inline void *irq_desc_get_chip_data(struct irq_desc *desc)
123{
124 return desc->irq_data.chip_data;
125}
126
127static inline void *irq_desc_get_handler_data(struct irq_desc *desc)
128{
129 return desc->irq_data.handler_data;
130}
131
132static inline struct msi_desc *irq_desc_get_msi_desc(struct irq_desc *desc)
133{
134 return desc->irq_data.msi_desc;
135}
136
137#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
138static inline struct irq_chip *get_irq_desc_chip(struct irq_desc *desc)
139{
140 return irq_desc_get_chip(desc);
141}
142static inline void *get_irq_desc_data(struct irq_desc *desc)
143{
144 return irq_desc_get_handler_data(desc);
145}
146
147static inline void *get_irq_desc_chip_data(struct irq_desc *desc)
148{
149 return irq_desc_get_chip_data(desc);
150}
151
152static inline struct msi_desc *get_irq_desc_msi(struct irq_desc *desc)
153{
154 return irq_desc_get_msi_desc(desc);
155}
156#endif
102 157
103/* 158/*
104 * Architectures call this to let the generic IRQ layer 159 * Architectures call this to let the generic IRQ layer
@@ -123,6 +178,7 @@ static inline int irq_has_action(unsigned int irq)
123 return desc->action != NULL; 178 return desc->action != NULL;
124} 179}
125 180
181#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
126static inline int irq_balancing_disabled(unsigned int irq) 182static inline int irq_balancing_disabled(unsigned int irq)
127{ 183{
128 struct irq_desc *desc; 184 struct irq_desc *desc;
@@ -130,6 +186,7 @@ static inline int irq_balancing_disabled(unsigned int irq)
130 desc = irq_to_desc(irq); 186 desc = irq_to_desc(irq);
131 return desc->status & IRQ_NO_BALANCING_MASK; 187 return desc->status & IRQ_NO_BALANCING_MASK;
132} 188}
189#endif
133 190
134/* caller has locked the irq_desc and both params are valid */ 191/* caller has locked the irq_desc and both params are valid */
135static inline void __set_irq_handler_unlocked(int irq, 192static inline void __set_irq_handler_unlocked(int irq,
@@ -140,6 +197,17 @@ static inline void __set_irq_handler_unlocked(int irq,
140 desc = irq_to_desc(irq); 197 desc = irq_to_desc(irq);
141 desc->handle_irq = handler; 198 desc->handle_irq = handler;
142} 199}
200
201#ifdef CONFIG_IRQ_PREFLOW_FASTEOI
202static inline void
203__irq_set_preflow_handler(unsigned int irq, irq_preflow_handler_t handler)
204{
205 struct irq_desc *desc;
206
207 desc = irq_to_desc(irq);
208 desc->preflow_handler = handler;
209}
210#endif
143#endif 211#endif
144 212
145#endif 213#endif
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 6811f4bfc6e7..922aa313c9f9 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -307,6 +307,7 @@ extern clock_t jiffies_to_clock_t(long x);
307extern unsigned long clock_t_to_jiffies(unsigned long x); 307extern unsigned long clock_t_to_jiffies(unsigned long x);
308extern u64 jiffies_64_to_clock_t(u64 x); 308extern u64 jiffies_64_to_clock_t(u64 x);
309extern u64 nsec_to_clock_t(u64 x); 309extern u64 nsec_to_clock_t(u64 x);
310extern u64 nsecs_to_jiffies64(u64 n);
310extern unsigned long nsecs_to_jiffies(u64 n); 311extern unsigned long nsecs_to_jiffies(u64 n);
311 312
312#define TIMESTAMP_SIZE 30 313#define TIMESTAMP_SIZE 30
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index ce0775aa64c3..7ff16f7d3ed4 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -64,7 +64,7 @@ struct kthread_work {
64}; 64};
65 65
66#define KTHREAD_WORKER_INIT(worker) { \ 66#define KTHREAD_WORKER_INIT(worker) { \
67 .lock = SPIN_LOCK_UNLOCKED, \ 67 .lock = __SPIN_LOCK_UNLOCKED((worker).lock), \
68 .work_list = LIST_HEAD_INIT((worker).work_list), \ 68 .work_list = LIST_HEAD_INIT((worker).work_list), \
69 } 69 }
70 70
diff --git a/include/linux/mfd/wm8994/core.h b/include/linux/mfd/wm8994/core.h
index 3fd36845ca45..ef4f0b6083a3 100644
--- a/include/linux/mfd/wm8994/core.h
+++ b/include/linux/mfd/wm8994/core.h
@@ -71,6 +71,7 @@ struct wm8994 {
71 u16 irq_masks_cache[WM8994_NUM_IRQ_REGS]; 71 u16 irq_masks_cache[WM8994_NUM_IRQ_REGS];
72 72
73 /* Used over suspend/resume */ 73 /* Used over suspend/resume */
74 bool suspended;
74 u16 ldo_regs[WM8994_NUM_LDO_REGS]; 75 u16 ldo_regs[WM8994_NUM_LDO_REGS];
75 u16 gpio_regs[WM8994_NUM_GPIO_REGS]; 76 u16 gpio_regs[WM8994_NUM_GPIO_REGS];
76 77
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f6385fc17ad4..679300c050f5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1309,8 +1309,6 @@ int add_from_early_node_map(struct range *range, int az,
1309 int nr_range, int nid); 1309 int nr_range, int nid);
1310u64 __init find_memory_core_early(int nid, u64 size, u64 align, 1310u64 __init find_memory_core_early(int nid, u64 size, u64 align,
1311 u64 goal, u64 limit); 1311 u64 goal, u64 limit);
1312void *__alloc_memory_core_early(int nodeid, u64 size, u64 align,
1313 u64 goal, u64 limit);
1314typedef int (*work_fn_t)(unsigned long, unsigned long, void *); 1312typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
1315extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); 1313extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data);
1316extern void sparse_memory_present_with_active_regions(int nid); 1314extern void sparse_memory_present_with_active_regions(int nid);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index f276d4fa01fc..9c8603872c36 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -19,7 +19,6 @@ struct nameidata {
19 struct path path; 19 struct path path;
20 struct qstr last; 20 struct qstr last;
21 struct path root; 21 struct path root;
22 struct file *file;
23 struct inode *inode; /* path.dentry.d_inode */ 22 struct inode *inode; /* path.dentry.d_inode */
24 unsigned int flags; 23 unsigned int flags;
25 unsigned seq; 24 unsigned seq;
@@ -63,6 +62,10 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
63#define LOOKUP_EXCL 0x0400 62#define LOOKUP_EXCL 0x0400
64#define LOOKUP_RENAME_TARGET 0x0800 63#define LOOKUP_RENAME_TARGET 0x0800
65 64
65#define LOOKUP_JUMPED 0x1000
66#define LOOKUP_ROOT 0x2000
67#define LOOKUP_EMPTY 0x4000
68
66extern int user_path_at(int, const char __user *, unsigned, struct path *); 69extern int user_path_at(int, const char __user *, unsigned, struct path *);
67 70
68#define user_path(name, path) user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW, path) 71#define user_path(name, path) user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW, path)
@@ -72,7 +75,7 @@ extern int user_path_at(int, const char __user *, unsigned, struct path *);
72 75
73extern int kern_path(const char *, unsigned, struct path *); 76extern int kern_path(const char *, unsigned, struct path *);
74 77
75extern int path_lookup(const char *, unsigned, struct nameidata *); 78extern int kern_path_parent(const char *, struct nameidata *);
76extern int vfs_path_lookup(struct dentry *, struct vfsmount *, 79extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
77 const char *, unsigned int, struct nameidata *); 80 const char *, unsigned int, struct nameidata *);
78 81
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d971346b0340..71caf7a5e6c6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2392,6 +2392,9 @@ extern int netdev_notice(const struct net_device *dev, const char *format, ...)
2392extern int netdev_info(const struct net_device *dev, const char *format, ...) 2392extern int netdev_info(const struct net_device *dev, const char *format, ...)
2393 __attribute__ ((format (printf, 2, 3))); 2393 __attribute__ ((format (printf, 2, 3)));
2394 2394
2395#define MODULE_ALIAS_NETDEV(device) \
2396 MODULE_ALIAS("netdev-" device)
2397
2395#if defined(DEBUG) 2398#if defined(DEBUG)
2396#define netdev_dbg(__dev, format, args...) \ 2399#define netdev_dbg(__dev, format, args...) \
2397 netdev_printk(KERN_DEBUG, __dev, format, ##args) 2400 netdev_printk(KERN_DEBUG, __dev, format, ##args)
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index b197563913bf..3e112de12d8d 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -68,11 +68,7 @@ struct nfs_client {
68 unsigned char cl_id_uniquifier; 68 unsigned char cl_id_uniquifier;
69 u32 cl_cb_ident; /* v4.0 callback identifier */ 69 u32 cl_cb_ident; /* v4.0 callback identifier */
70 const struct nfs4_minor_version_ops *cl_mvops; 70 const struct nfs4_minor_version_ops *cl_mvops;
71#endif /* CONFIG_NFS_V4 */
72 71
73#ifdef CONFIG_NFS_V4_1
74 /* clientid returned from EXCHANGE_ID, used by session operations */
75 u64 cl_ex_clid;
76 /* The sequence id to use for the next CREATE_SESSION */ 72 /* The sequence id to use for the next CREATE_SESSION */
77 u32 cl_seqid; 73 u32 cl_seqid;
78 /* The flags used for obtaining the clientid during EXCHANGE_ID */ 74 /* The flags used for obtaining the clientid during EXCHANGE_ID */
@@ -80,7 +76,7 @@ struct nfs_client {
80 struct nfs4_session *cl_session; /* sharred session */ 76 struct nfs4_session *cl_session; /* sharred session */
81 struct list_head cl_layouts; 77 struct list_head cl_layouts;
82 struct pnfs_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */ 78 struct pnfs_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */
83#endif /* CONFIG_NFS_V4_1 */ 79#endif /* CONFIG_NFS_V4 */
84 80
85#ifdef CONFIG_NFS_FSCACHE 81#ifdef CONFIG_NFS_FSCACHE
86 struct fscache_cookie *fscache; /* client index cache cookie */ 82 struct fscache_cookie *fscache; /* client index cache cookie */
@@ -185,7 +181,7 @@ struct nfs_server {
185/* maximum number of slots to use */ 181/* maximum number of slots to use */
186#define NFS4_MAX_SLOT_TABLE RPC_MAX_SLOT_TABLE 182#define NFS4_MAX_SLOT_TABLE RPC_MAX_SLOT_TABLE
187 183
188#if defined(CONFIG_NFS_V4_1) 184#if defined(CONFIG_NFS_V4)
189 185
190/* Sessions */ 186/* Sessions */
191#define SLOT_TABLE_SZ (NFS4_MAX_SLOT_TABLE/(8*sizeof(long))) 187#define SLOT_TABLE_SZ (NFS4_MAX_SLOT_TABLE/(8*sizeof(long)))
@@ -225,5 +221,5 @@ struct nfs4_session {
225 struct nfs_client *clp; 221 struct nfs_client *clp;
226}; 222};
227 223
228#endif /* CONFIG_NFS_V4_1 */ 224#endif /* CONFIG_NFS_V4 */
229#endif 225#endif
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 3adb06ebf841..580de67f318b 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -518,6 +518,7 @@
518#define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303 518#define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303
519#define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304 519#define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304
520#define PCI_DEVICE_ID_AMD_15H_NB_MISC 0x1603 520#define PCI_DEVICE_ID_AMD_15H_NB_MISC 0x1603
521#define PCI_DEVICE_ID_AMD_15H_NB_LINK 0x1604
521#define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 522#define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703
522#define PCI_DEVICE_ID_AMD_LANCE 0x2000 523#define PCI_DEVICE_ID_AMD_LANCE 0x2000
523#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 524#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index dda5b0a3ff60..614615b8d42b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -225,8 +225,14 @@ struct perf_event_attr {
225 }; 225 };
226 226
227 __u32 bp_type; 227 __u32 bp_type;
228 __u64 bp_addr; 228 union {
229 __u64 bp_len; 229 __u64 bp_addr;
230 __u64 config1; /* extension of config */
231 };
232 union {
233 __u64 bp_len;
234 __u64 config2; /* extension of config1 */
235 };
230}; 236};
231 237
232/* 238/*
@@ -464,6 +470,7 @@ enum perf_callchain_context {
464 470
465#define PERF_FLAG_FD_NO_GROUP (1U << 0) 471#define PERF_FLAG_FD_NO_GROUP (1U << 0)
466#define PERF_FLAG_FD_OUTPUT (1U << 1) 472#define PERF_FLAG_FD_OUTPUT (1U << 1)
473#define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */
467 474
468#ifdef __KERNEL__ 475#ifdef __KERNEL__
469/* 476/*
@@ -471,6 +478,7 @@ enum perf_callchain_context {
471 */ 478 */
472 479
473#ifdef CONFIG_PERF_EVENTS 480#ifdef CONFIG_PERF_EVENTS
481# include <linux/cgroup.h>
474# include <asm/perf_event.h> 482# include <asm/perf_event.h>
475# include <asm/local64.h> 483# include <asm/local64.h>
476#endif 484#endif
@@ -539,6 +547,9 @@ struct hw_perf_event {
539 unsigned long event_base; 547 unsigned long event_base;
540 int idx; 548 int idx;
541 int last_cpu; 549 int last_cpu;
550 unsigned int extra_reg;
551 u64 extra_config;
552 int extra_alloc;
542 }; 553 };
543 struct { /* software */ 554 struct { /* software */
544 struct hrtimer hrtimer; 555 struct hrtimer hrtimer;
@@ -716,6 +727,22 @@ struct swevent_hlist {
716#define PERF_ATTACH_GROUP 0x02 727#define PERF_ATTACH_GROUP 0x02
717#define PERF_ATTACH_TASK 0x04 728#define PERF_ATTACH_TASK 0x04
718 729
730#ifdef CONFIG_CGROUP_PERF
731/*
732 * perf_cgroup_info keeps track of time_enabled for a cgroup.
733 * This is a per-cpu dynamically allocated data structure.
734 */
735struct perf_cgroup_info {
736 u64 time;
737 u64 timestamp;
738};
739
740struct perf_cgroup {
741 struct cgroup_subsys_state css;
742 struct perf_cgroup_info *info; /* timing info, one per cpu */
743};
744#endif
745
719/** 746/**
720 * struct perf_event - performance event kernel representation: 747 * struct perf_event - performance event kernel representation:
721 */ 748 */
@@ -832,6 +859,11 @@ struct perf_event {
832 struct event_filter *filter; 859 struct event_filter *filter;
833#endif 860#endif
834 861
862#ifdef CONFIG_CGROUP_PERF
863 struct perf_cgroup *cgrp; /* cgroup event is attach to */
864 int cgrp_defer_enabled;
865#endif
866
835#endif /* CONFIG_PERF_EVENTS */ 867#endif /* CONFIG_PERF_EVENTS */
836}; 868};
837 869
@@ -886,6 +918,7 @@ struct perf_event_context {
886 u64 generation; 918 u64 generation;
887 int pin_count; 919 int pin_count;
888 struct rcu_head rcu_head; 920 struct rcu_head rcu_head;
921 int nr_cgroups; /* cgroup events present */
889}; 922};
890 923
891/* 924/*
@@ -905,6 +938,9 @@ struct perf_cpu_context {
905 struct list_head rotation_list; 938 struct list_head rotation_list;
906 int jiffies_interval; 939 int jiffies_interval;
907 struct pmu *active_pmu; 940 struct pmu *active_pmu;
941#ifdef CONFIG_CGROUP_PERF
942 struct perf_cgroup *cgrp;
943#endif
908}; 944};
909 945
910struct perf_output_handle { 946struct perf_output_handle {
@@ -1040,11 +1076,11 @@ have_event:
1040 __perf_sw_event(event_id, nr, nmi, regs, addr); 1076 __perf_sw_event(event_id, nr, nmi, regs, addr);
1041} 1077}
1042 1078
1043extern atomic_t perf_task_events; 1079extern atomic_t perf_sched_events;
1044 1080
1045static inline void perf_event_task_sched_in(struct task_struct *task) 1081static inline void perf_event_task_sched_in(struct task_struct *task)
1046{ 1082{
1047 COND_STMT(&perf_task_events, __perf_event_task_sched_in(task)); 1083 COND_STMT(&perf_sched_events, __perf_event_task_sched_in(task));
1048} 1084}
1049 1085
1050static inline 1086static inline
@@ -1052,7 +1088,7 @@ void perf_event_task_sched_out(struct task_struct *task, struct task_struct *nex
1052{ 1088{
1053 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); 1089 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
1054 1090
1055 COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next)); 1091 COND_STMT(&perf_sched_events, __perf_event_task_sched_out(task, next));
1056} 1092}
1057 1093
1058extern void perf_event_mmap(struct vm_area_struct *vma); 1094extern void perf_event_mmap(struct vm_area_struct *vma);
@@ -1083,6 +1119,10 @@ extern int sysctl_perf_event_paranoid;
1083extern int sysctl_perf_event_mlock; 1119extern int sysctl_perf_event_mlock;
1084extern int sysctl_perf_event_sample_rate; 1120extern int sysctl_perf_event_sample_rate;
1085 1121
1122extern int perf_proc_update_handler(struct ctl_table *table, int write,
1123 void __user *buffer, size_t *lenp,
1124 loff_t *ppos);
1125
1086static inline bool perf_paranoid_tracepoint_raw(void) 1126static inline bool perf_paranoid_tracepoint_raw(void)
1087{ 1127{
1088 return sysctl_perf_event_paranoid > -1; 1128 return sysctl_perf_event_paranoid > -1;
diff --git a/include/linux/plist.h b/include/linux/plist.h
index 7254eda078e5..c9b9f322c8d8 100644
--- a/include/linux/plist.h
+++ b/include/linux/plist.h
@@ -31,15 +31,17 @@
31 * 31 *
32 * Simple ASCII art explanation: 32 * Simple ASCII art explanation:
33 * 33 *
34 * |HEAD | 34 * pl:prio_list (only for plist_node)
35 * | | 35 * nl:node_list
36 * |prio_list.prev|<------------------------------------| 36 * HEAD| NODE(S)
37 * |prio_list.next|<->|pl|<->|pl|<--------------->|pl|<-| 37 * |
38 * |10 | |10| |21| |21| |21| |40| (prio) 38 * ||------------------------------------|
39 * | | | | | | | | | | | | 39 * ||->|pl|<->|pl|<--------------->|pl|<-|
40 * | | | | | | | | | | | | 40 * | |10| |21| |21| |21| |40| (prio)
41 * |node_list.next|<->|nl|<->|nl|<->|nl|<->|nl|<->|nl|<-| 41 * | | | | | | | | | | |
42 * |node_list.prev|<------------------------------------| 42 * | | | | | | | | | | |
43 * |->|nl|<->|nl|<->|nl|<->|nl|<->|nl|<->|nl|<-|
44 * |-------------------------------------------|
43 * 45 *
44 * The nodes on the prio_list list are sorted by priority to simplify 46 * The nodes on the prio_list list are sorted by priority to simplify
45 * the insertion of new nodes. There are no nodes with duplicate 47 * the insertion of new nodes. There are no nodes with duplicate
@@ -78,7 +80,6 @@
78#include <linux/spinlock_types.h> 80#include <linux/spinlock_types.h>
79 81
80struct plist_head { 82struct plist_head {
81 struct list_head prio_list;
82 struct list_head node_list; 83 struct list_head node_list;
83#ifdef CONFIG_DEBUG_PI_LIST 84#ifdef CONFIG_DEBUG_PI_LIST
84 raw_spinlock_t *rawlock; 85 raw_spinlock_t *rawlock;
@@ -88,7 +89,8 @@ struct plist_head {
88 89
89struct plist_node { 90struct plist_node {
90 int prio; 91 int prio;
91 struct plist_head plist; 92 struct list_head prio_list;
93 struct list_head node_list;
92}; 94};
93 95
94#ifdef CONFIG_DEBUG_PI_LIST 96#ifdef CONFIG_DEBUG_PI_LIST
@@ -100,7 +102,6 @@ struct plist_node {
100#endif 102#endif
101 103
102#define _PLIST_HEAD_INIT(head) \ 104#define _PLIST_HEAD_INIT(head) \
103 .prio_list = LIST_HEAD_INIT((head).prio_list), \
104 .node_list = LIST_HEAD_INIT((head).node_list) 105 .node_list = LIST_HEAD_INIT((head).node_list)
105 106
106/** 107/**
@@ -133,7 +134,8 @@ struct plist_node {
133#define PLIST_NODE_INIT(node, __prio) \ 134#define PLIST_NODE_INIT(node, __prio) \
134{ \ 135{ \
135 .prio = (__prio), \ 136 .prio = (__prio), \
136 .plist = { _PLIST_HEAD_INIT((node).plist) }, \ 137 .prio_list = LIST_HEAD_INIT((node).prio_list), \
138 .node_list = LIST_HEAD_INIT((node).node_list), \
137} 139}
138 140
139/** 141/**
@@ -144,7 +146,6 @@ struct plist_node {
144static inline void 146static inline void
145plist_head_init(struct plist_head *head, spinlock_t *lock) 147plist_head_init(struct plist_head *head, spinlock_t *lock)
146{ 148{
147 INIT_LIST_HEAD(&head->prio_list);
148 INIT_LIST_HEAD(&head->node_list); 149 INIT_LIST_HEAD(&head->node_list);
149#ifdef CONFIG_DEBUG_PI_LIST 150#ifdef CONFIG_DEBUG_PI_LIST
150 head->spinlock = lock; 151 head->spinlock = lock;
@@ -160,7 +161,6 @@ plist_head_init(struct plist_head *head, spinlock_t *lock)
160static inline void 161static inline void
161plist_head_init_raw(struct plist_head *head, raw_spinlock_t *lock) 162plist_head_init_raw(struct plist_head *head, raw_spinlock_t *lock)
162{ 163{
163 INIT_LIST_HEAD(&head->prio_list);
164 INIT_LIST_HEAD(&head->node_list); 164 INIT_LIST_HEAD(&head->node_list);
165#ifdef CONFIG_DEBUG_PI_LIST 165#ifdef CONFIG_DEBUG_PI_LIST
166 head->rawlock = lock; 166 head->rawlock = lock;
@@ -176,7 +176,8 @@ plist_head_init_raw(struct plist_head *head, raw_spinlock_t *lock)
176static inline void plist_node_init(struct plist_node *node, int prio) 176static inline void plist_node_init(struct plist_node *node, int prio)
177{ 177{
178 node->prio = prio; 178 node->prio = prio;
179 plist_head_init(&node->plist, NULL); 179 INIT_LIST_HEAD(&node->prio_list);
180 INIT_LIST_HEAD(&node->node_list);
180} 181}
181 182
182extern void plist_add(struct plist_node *node, struct plist_head *head); 183extern void plist_add(struct plist_node *node, struct plist_head *head);
@@ -188,7 +189,7 @@ extern void plist_del(struct plist_node *node, struct plist_head *head);
188 * @head: the head for your list 189 * @head: the head for your list
189 */ 190 */
190#define plist_for_each(pos, head) \ 191#define plist_for_each(pos, head) \
191 list_for_each_entry(pos, &(head)->node_list, plist.node_list) 192 list_for_each_entry(pos, &(head)->node_list, node_list)
192 193
193/** 194/**
194 * plist_for_each_safe - iterate safely over a plist of given type 195 * plist_for_each_safe - iterate safely over a plist of given type
@@ -199,7 +200,7 @@ extern void plist_del(struct plist_node *node, struct plist_head *head);
199 * Iterate over a plist of given type, safe against removal of list entry. 200 * Iterate over a plist of given type, safe against removal of list entry.
200 */ 201 */
201#define plist_for_each_safe(pos, n, head) \ 202#define plist_for_each_safe(pos, n, head) \
202 list_for_each_entry_safe(pos, n, &(head)->node_list, plist.node_list) 203 list_for_each_entry_safe(pos, n, &(head)->node_list, node_list)
203 204
204/** 205/**
205 * plist_for_each_entry - iterate over list of given type 206 * plist_for_each_entry - iterate over list of given type
@@ -208,7 +209,7 @@ extern void plist_del(struct plist_node *node, struct plist_head *head);
208 * @mem: the name of the list_struct within the struct 209 * @mem: the name of the list_struct within the struct
209 */ 210 */
210#define plist_for_each_entry(pos, head, mem) \ 211#define plist_for_each_entry(pos, head, mem) \
211 list_for_each_entry(pos, &(head)->node_list, mem.plist.node_list) 212 list_for_each_entry(pos, &(head)->node_list, mem.node_list)
212 213
213/** 214/**
214 * plist_for_each_entry_safe - iterate safely over list of given type 215 * plist_for_each_entry_safe - iterate safely over list of given type
@@ -220,7 +221,7 @@ extern void plist_del(struct plist_node *node, struct plist_head *head);
220 * Iterate over list of given type, safe against removal of list entry. 221 * Iterate over list of given type, safe against removal of list entry.
221 */ 222 */
222#define plist_for_each_entry_safe(pos, n, head, m) \ 223#define plist_for_each_entry_safe(pos, n, head, m) \
223 list_for_each_entry_safe(pos, n, &(head)->node_list, m.plist.node_list) 224 list_for_each_entry_safe(pos, n, &(head)->node_list, m.node_list)
224 225
225/** 226/**
226 * plist_head_empty - return !0 if a plist_head is empty 227 * plist_head_empty - return !0 if a plist_head is empty
@@ -237,7 +238,7 @@ static inline int plist_head_empty(const struct plist_head *head)
237 */ 238 */
238static inline int plist_node_empty(const struct plist_node *node) 239static inline int plist_node_empty(const struct plist_node *node)
239{ 240{
240 return plist_head_empty(&node->plist); 241 return list_empty(&node->node_list);
241} 242}
242 243
243/* All functions below assume the plist_head is not empty. */ 244/* All functions below assume the plist_head is not empty. */
@@ -285,7 +286,7 @@ static inline int plist_node_empty(const struct plist_node *node)
285static inline struct plist_node *plist_first(const struct plist_head *head) 286static inline struct plist_node *plist_first(const struct plist_head *head)
286{ 287{
287 return list_entry(head->node_list.next, 288 return list_entry(head->node_list.next,
288 struct plist_node, plist.node_list); 289 struct plist_node, node_list);
289} 290}
290 291
291/** 292/**
@@ -297,7 +298,7 @@ static inline struct plist_node *plist_first(const struct plist_head *head)
297static inline struct plist_node *plist_last(const struct plist_head *head) 298static inline struct plist_node *plist_last(const struct plist_head *head)
298{ 299{
299 return list_entry(head->node_list.prev, 300 return list_entry(head->node_list.prev,
300 struct plist_node, plist.node_list); 301 struct plist_node, node_list);
301} 302}
302 303
303#endif 304#endif
diff --git a/include/linux/pm.h b/include/linux/pm.h
index dd9c7ab38270..21415cc91cbb 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -431,6 +431,8 @@ struct dev_pm_info {
431 struct list_head entry; 431 struct list_head entry;
432 struct completion completion; 432 struct completion completion;
433 struct wakeup_source *wakeup; 433 struct wakeup_source *wakeup;
434#else
435 unsigned int should_wakeup:1;
434#endif 436#endif
435#ifdef CONFIG_PM_RUNTIME 437#ifdef CONFIG_PM_RUNTIME
436 struct timer_list suspend_timer; 438 struct timer_list suspend_timer;
diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h
index 9cff00dd6b63..03a67db03d01 100644
--- a/include/linux/pm_wakeup.h
+++ b/include/linux/pm_wakeup.h
@@ -109,11 +109,6 @@ static inline bool device_can_wakeup(struct device *dev)
109 return dev->power.can_wakeup; 109 return dev->power.can_wakeup;
110} 110}
111 111
112static inline bool device_may_wakeup(struct device *dev)
113{
114 return false;
115}
116
117static inline struct wakeup_source *wakeup_source_create(const char *name) 112static inline struct wakeup_source *wakeup_source_create(const char *name)
118{ 113{
119 return NULL; 114 return NULL;
@@ -134,24 +129,32 @@ static inline void wakeup_source_unregister(struct wakeup_source *ws) {}
134 129
135static inline int device_wakeup_enable(struct device *dev) 130static inline int device_wakeup_enable(struct device *dev)
136{ 131{
137 return -EINVAL; 132 dev->power.should_wakeup = true;
133 return 0;
138} 134}
139 135
140static inline int device_wakeup_disable(struct device *dev) 136static inline int device_wakeup_disable(struct device *dev)
141{ 137{
138 dev->power.should_wakeup = false;
142 return 0; 139 return 0;
143} 140}
144 141
145static inline int device_init_wakeup(struct device *dev, bool val) 142static inline int device_set_wakeup_enable(struct device *dev, bool enable)
146{ 143{
147 dev->power.can_wakeup = val; 144 dev->power.should_wakeup = enable;
148 return val ? -EINVAL : 0; 145 return 0;
149} 146}
150 147
148static inline int device_init_wakeup(struct device *dev, bool val)
149{
150 device_set_wakeup_capable(dev, val);
151 device_set_wakeup_enable(dev, val);
152 return 0;
153}
151 154
152static inline int device_set_wakeup_enable(struct device *dev, bool enable) 155static inline bool device_may_wakeup(struct device *dev)
153{ 156{
154 return -EINVAL; 157 return dev->power.can_wakeup && dev->power.should_wakeup;
155} 158}
156 159
157static inline void __pm_stay_awake(struct wakeup_source *ws) {} 160static inline void __pm_stay_awake(struct wakeup_source *ws) {}
diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h
new file mode 100644
index 000000000000..369e19d3750b
--- /dev/null
+++ b/include/linux/posix-clock.h
@@ -0,0 +1,150 @@
1/*
2 * posix-clock.h - support for dynamic clock devices
3 *
4 * Copyright (C) 2010 OMICRON electronics GmbH
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20#ifndef _LINUX_POSIX_CLOCK_H_
21#define _LINUX_POSIX_CLOCK_H_
22
23#include <linux/cdev.h>
24#include <linux/fs.h>
25#include <linux/poll.h>
26#include <linux/posix-timers.h>
27
28struct posix_clock;
29
30/**
31 * struct posix_clock_operations - functional interface to the clock
32 *
33 * Every posix clock is represented by a character device. Drivers may
34 * optionally offer extended capabilities by implementing the
35 * character device methods. The character device file operations are
36 * first handled by the clock device layer, then passed on to the
37 * driver by calling these functions.
38 *
39 * @owner: The clock driver should set to THIS_MODULE
40 * @clock_adjtime: Adjust the clock
41 * @clock_gettime: Read the current time
42 * @clock_getres: Get the clock resolution
43 * @clock_settime: Set the current time value
44 * @timer_create: Create a new timer
45 * @timer_delete: Remove a previously created timer
46 * @timer_gettime: Get remaining time and interval of a timer
47 * @timer_setttime: Set a timer's initial expiration and interval
48 * @fasync: Optional character device fasync method
49 * @mmap: Optional character device mmap method
50 * @open: Optional character device open method
51 * @release: Optional character device release method
52 * @ioctl: Optional character device ioctl method
53 * @read: Optional character device read method
54 * @poll: Optional character device poll method
55 */
56struct posix_clock_operations {
57 struct module *owner;
58
59 int (*clock_adjtime)(struct posix_clock *pc, struct timex *tx);
60
61 int (*clock_gettime)(struct posix_clock *pc, struct timespec *ts);
62
63 int (*clock_getres) (struct posix_clock *pc, struct timespec *ts);
64
65 int (*clock_settime)(struct posix_clock *pc,
66 const struct timespec *ts);
67
68 int (*timer_create) (struct posix_clock *pc, struct k_itimer *kit);
69
70 int (*timer_delete) (struct posix_clock *pc, struct k_itimer *kit);
71
72 void (*timer_gettime)(struct posix_clock *pc,
73 struct k_itimer *kit, struct itimerspec *tsp);
74
75 int (*timer_settime)(struct posix_clock *pc,
76 struct k_itimer *kit, int flags,
77 struct itimerspec *tsp, struct itimerspec *old);
78 /*
79 * Optional character device methods:
80 */
81 int (*fasync) (struct posix_clock *pc,
82 int fd, struct file *file, int on);
83
84 long (*ioctl) (struct posix_clock *pc,
85 unsigned int cmd, unsigned long arg);
86
87 int (*mmap) (struct posix_clock *pc,
88 struct vm_area_struct *vma);
89
90 int (*open) (struct posix_clock *pc, fmode_t f_mode);
91
92 uint (*poll) (struct posix_clock *pc,
93 struct file *file, poll_table *wait);
94
95 int (*release) (struct posix_clock *pc);
96
97 ssize_t (*read) (struct posix_clock *pc,
98 uint flags, char __user *buf, size_t cnt);
99};
100
101/**
102 * struct posix_clock - represents a dynamic posix clock
103 *
104 * @ops: Functional interface to the clock
105 * @cdev: Character device instance for this clock
106 * @kref: Reference count.
107 * @mutex: Protects the 'zombie' field from concurrent access.
108 * @zombie: If 'zombie' is true, then the hardware has disappeared.
109 * @release: A function to free the structure when the reference count reaches
110 * zero. May be NULL if structure is statically allocated.
111 *
112 * Drivers should embed their struct posix_clock within a private
113 * structure, obtaining a reference to it during callbacks using
114 * container_of().
115 */
116struct posix_clock {
117 struct posix_clock_operations ops;
118 struct cdev cdev;
119 struct kref kref;
120 struct mutex mutex;
121 bool zombie;
122 void (*release)(struct posix_clock *clk);
123};
124
125/**
126 * posix_clock_register() - register a new clock
127 * @clk: Pointer to the clock. Caller must provide 'ops' and 'release'
128 * @devid: Allocated device id
129 *
130 * A clock driver calls this function to register itself with the
131 * clock device subsystem. If 'clk' points to dynamically allocated
132 * memory, then the caller must provide a 'release' function to free
133 * that memory.
134 *
135 * Returns zero on success, non-zero otherwise.
136 */
137int posix_clock_register(struct posix_clock *clk, dev_t devid);
138
139/**
140 * posix_clock_unregister() - unregister a clock
141 * @clk: Clock instance previously registered via posix_clock_register()
142 *
143 * A clock driver calls this function to remove itself from the clock
144 * device subsystem. The posix_clock itself will remain (in an
145 * inactive state) until its reference count drops to zero, at which
146 * point it will be deallocated with its 'release' method.
147 */
148void posix_clock_unregister(struct posix_clock *clk);
149
150#endif
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 3e23844a6990..d51243ae0726 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -4,6 +4,7 @@
4#include <linux/spinlock.h> 4#include <linux/spinlock.h>
5#include <linux/list.h> 5#include <linux/list.h>
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/timex.h>
7 8
8union cpu_time_count { 9union cpu_time_count {
9 cputime_t cpu; 10 cputime_t cpu;
@@ -17,10 +18,21 @@ struct cpu_timer_list {
17 int firing; 18 int firing;
18}; 19};
19 20
21/*
22 * Bit fields within a clockid:
23 *
24 * The most significant 29 bits hold either a pid or a file descriptor.
25 *
26 * Bit 2 indicates whether a cpu clock refers to a thread or a process.
27 *
28 * Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or FD=3.
29 *
30 * A clockid is invalid if bits 2, 1, and 0 are all set.
31 */
20#define CPUCLOCK_PID(clock) ((pid_t) ~((clock) >> 3)) 32#define CPUCLOCK_PID(clock) ((pid_t) ~((clock) >> 3))
21#define CPUCLOCK_PERTHREAD(clock) \ 33#define CPUCLOCK_PERTHREAD(clock) \
22 (((clock) & (clockid_t) CPUCLOCK_PERTHREAD_MASK) != 0) 34 (((clock) & (clockid_t) CPUCLOCK_PERTHREAD_MASK) != 0)
23#define CPUCLOCK_PID_MASK 7 35
24#define CPUCLOCK_PERTHREAD_MASK 4 36#define CPUCLOCK_PERTHREAD_MASK 4
25#define CPUCLOCK_WHICH(clock) ((clock) & (clockid_t) CPUCLOCK_CLOCK_MASK) 37#define CPUCLOCK_WHICH(clock) ((clock) & (clockid_t) CPUCLOCK_CLOCK_MASK)
26#define CPUCLOCK_CLOCK_MASK 3 38#define CPUCLOCK_CLOCK_MASK 3
@@ -28,12 +40,17 @@ struct cpu_timer_list {
28#define CPUCLOCK_VIRT 1 40#define CPUCLOCK_VIRT 1
29#define CPUCLOCK_SCHED 2 41#define CPUCLOCK_SCHED 2
30#define CPUCLOCK_MAX 3 42#define CPUCLOCK_MAX 3
43#define CLOCKFD CPUCLOCK_MAX
44#define CLOCKFD_MASK (CPUCLOCK_PERTHREAD_MASK|CPUCLOCK_CLOCK_MASK)
31 45
32#define MAKE_PROCESS_CPUCLOCK(pid, clock) \ 46#define MAKE_PROCESS_CPUCLOCK(pid, clock) \
33 ((~(clockid_t) (pid) << 3) | (clockid_t) (clock)) 47 ((~(clockid_t) (pid) << 3) | (clockid_t) (clock))
34#define MAKE_THREAD_CPUCLOCK(tid, clock) \ 48#define MAKE_THREAD_CPUCLOCK(tid, clock) \
35 MAKE_PROCESS_CPUCLOCK((tid), (clock) | CPUCLOCK_PERTHREAD_MASK) 49 MAKE_PROCESS_CPUCLOCK((tid), (clock) | CPUCLOCK_PERTHREAD_MASK)
36 50
51#define FD_TO_CLOCKID(fd) ((~(clockid_t) (fd) << 3) | CLOCKFD)
52#define CLOCKID_TO_FD(clk) ((unsigned int) ~((clk) >> 3))
53
37/* POSIX.1b interval timer structure. */ 54/* POSIX.1b interval timer structure. */
38struct k_itimer { 55struct k_itimer {
39 struct list_head list; /* free/ allocate list */ 56 struct list_head list; /* free/ allocate list */
@@ -67,10 +84,11 @@ struct k_itimer {
67}; 84};
68 85
69struct k_clock { 86struct k_clock {
70 int res; /* in nanoseconds */
71 int (*clock_getres) (const clockid_t which_clock, struct timespec *tp); 87 int (*clock_getres) (const clockid_t which_clock, struct timespec *tp);
72 int (*clock_set) (const clockid_t which_clock, struct timespec * tp); 88 int (*clock_set) (const clockid_t which_clock,
89 const struct timespec *tp);
73 int (*clock_get) (const clockid_t which_clock, struct timespec * tp); 90 int (*clock_get) (const clockid_t which_clock, struct timespec * tp);
91 int (*clock_adj) (const clockid_t which_clock, struct timex *tx);
74 int (*timer_create) (struct k_itimer *timer); 92 int (*timer_create) (struct k_itimer *timer);
75 int (*nsleep) (const clockid_t which_clock, int flags, 93 int (*nsleep) (const clockid_t which_clock, int flags,
76 struct timespec *, struct timespec __user *); 94 struct timespec *, struct timespec __user *);
@@ -84,28 +102,14 @@ struct k_clock {
84 struct itimerspec * cur_setting); 102 struct itimerspec * cur_setting);
85}; 103};
86 104
87void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock); 105extern struct k_clock clock_posix_cpu;
106extern struct k_clock clock_posix_dynamic;
88 107
89/* error handlers for timer_create, nanosleep and settime */ 108void posix_timers_register_clock(const clockid_t clock_id, struct k_clock *new_clock);
90int do_posix_clock_nonanosleep(const clockid_t, int flags, struct timespec *,
91 struct timespec __user *);
92int do_posix_clock_nosettime(const clockid_t, struct timespec *tp);
93 109
94/* function to call to trigger timer event */ 110/* function to call to trigger timer event */
95int posix_timer_event(struct k_itimer *timr, int si_private); 111int posix_timer_event(struct k_itimer *timr, int si_private);
96 112
97int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *ts);
98int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *ts);
99int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *ts);
100int posix_cpu_timer_create(struct k_itimer *timer);
101int posix_cpu_nsleep(const clockid_t which_clock, int flags,
102 struct timespec *rqtp, struct timespec __user *rmtp);
103long posix_cpu_nsleep_restart(struct restart_block *restart_block);
104int posix_cpu_timer_set(struct k_itimer *timer, int flags,
105 struct itimerspec *new, struct itimerspec *old);
106int posix_cpu_timer_del(struct k_itimer *timer);
107void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp);
108
109void posix_cpu_timer_schedule(struct k_itimer *timer); 113void posix_cpu_timer_schedule(struct k_itimer *timer);
110 114
111void run_posix_cpu_timers(struct task_struct *task); 115void run_posix_cpu_timers(struct task_struct *task);
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 092a04f874a8..a1147e5dd245 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -102,11 +102,8 @@
102 102
103extern long arch_ptrace(struct task_struct *child, long request, 103extern long arch_ptrace(struct task_struct *child, long request,
104 unsigned long addr, unsigned long data); 104 unsigned long addr, unsigned long data);
105extern int ptrace_traceme(void);
106extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len); 105extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len);
107extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len); 106extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len);
108extern int ptrace_attach(struct task_struct *tsk);
109extern int ptrace_detach(struct task_struct *, unsigned int);
110extern void ptrace_disable(struct task_struct *); 107extern void ptrace_disable(struct task_struct *);
111extern int ptrace_check_attach(struct task_struct *task, int kill); 108extern int ptrace_check_attach(struct task_struct *task, int kill);
112extern int ptrace_request(struct task_struct *child, long request, 109extern int ptrace_request(struct task_struct *child, long request,
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 8d3a2486544d..ab38ac80b0f9 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -100,6 +100,8 @@ void ring_buffer_free(struct ring_buffer *buffer);
100 100
101int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size); 101int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size);
102 102
103void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val);
104
103struct ring_buffer_event *ring_buffer_lock_reserve(struct ring_buffer *buffer, 105struct ring_buffer_event *ring_buffer_lock_reserve(struct ring_buffer *buffer,
104 unsigned long length); 106 unsigned long length);
105int ring_buffer_unlock_commit(struct ring_buffer *buffer, 107int ring_buffer_unlock_commit(struct ring_buffer *buffer,
diff --git a/include/linux/rio_regs.h b/include/linux/rio_regs.h
index d63dcbaea169..9026b30238f3 100644
--- a/include/linux/rio_regs.h
+++ b/include/linux/rio_regs.h
@@ -14,10 +14,12 @@
14#define LINUX_RIO_REGS_H 14#define LINUX_RIO_REGS_H
15 15
16/* 16/*
17 * In RapidIO, each device has a 2MB configuration space that is 17 * In RapidIO, each device has a 16MB configuration space that is
18 * accessed via maintenance transactions. Portions of configuration 18 * accessed via maintenance transactions. Portions of configuration
19 * space are standardized and/or reserved. 19 * space are standardized and/or reserved.
20 */ 20 */
21#define RIO_MAINT_SPACE_SZ 0x1000000 /* 16MB of RapidIO mainenance space */
22
21#define RIO_DEV_ID_CAR 0x00 /* [I] Device Identity CAR */ 23#define RIO_DEV_ID_CAR 0x00 /* [I] Device Identity CAR */
22#define RIO_DEV_INFO_CAR 0x04 /* [I] Device Information CAR */ 24#define RIO_DEV_INFO_CAR 0x04 /* [I] Device Information CAR */
23#define RIO_ASM_ID_CAR 0x08 /* [I] Assembly Identity CAR */ 25#define RIO_ASM_ID_CAR 0x08 /* [I] Assembly Identity CAR */
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 89c3e5182991..2ca7e8a78060 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -133,7 +133,6 @@ extern struct class *rtc_class;
133 * The (current) exceptions are mostly filesystem hooks: 133 * The (current) exceptions are mostly filesystem hooks:
134 * - the proc() hook for procfs 134 * - the proc() hook for procfs
135 * - non-ioctl() chardev hooks: open(), release(), read_callback() 135 * - non-ioctl() chardev hooks: open(), release(), read_callback()
136 * - periodic irq calls: irq_set_state(), irq_set_freq()
137 * 136 *
138 * REVISIT those periodic irq calls *do* have ops_lock when they're 137 * REVISIT those periodic irq calls *do* have ops_lock when they're
139 * issued through ioctl() ... 138 * issued through ioctl() ...
@@ -148,11 +147,8 @@ struct rtc_class_ops {
148 int (*set_alarm)(struct device *, struct rtc_wkalrm *); 147 int (*set_alarm)(struct device *, struct rtc_wkalrm *);
149 int (*proc)(struct device *, struct seq_file *); 148 int (*proc)(struct device *, struct seq_file *);
150 int (*set_mmss)(struct device *, unsigned long secs); 149 int (*set_mmss)(struct device *, unsigned long secs);
151 int (*irq_set_state)(struct device *, int enabled);
152 int (*irq_set_freq)(struct device *, int freq);
153 int (*read_callback)(struct device *, int data); 150 int (*read_callback)(struct device *, int data);
154 int (*alarm_irq_enable)(struct device *, unsigned int enabled); 151 int (*alarm_irq_enable)(struct device *, unsigned int enabled);
155 int (*update_irq_enable)(struct device *, unsigned int enabled);
156}; 152};
157 153
158#define RTC_DEVICE_NAME_SIZE 20 154#define RTC_DEVICE_NAME_SIZE 20
@@ -227,6 +223,7 @@ extern void rtc_device_unregister(struct rtc_device *rtc);
227extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm); 223extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm);
228extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm); 224extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm);
229extern int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs); 225extern int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs);
226int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm);
230extern int rtc_read_alarm(struct rtc_device *rtc, 227extern int rtc_read_alarm(struct rtc_device *rtc,
231 struct rtc_wkalrm *alrm); 228 struct rtc_wkalrm *alrm);
232extern int rtc_set_alarm(struct rtc_device *rtc, 229extern int rtc_set_alarm(struct rtc_device *rtc,
diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h
index bd31808c7d8e..cc0072e93e36 100644
--- a/include/linux/rwlock_types.h
+++ b/include/linux/rwlock_types.h
@@ -43,14 +43,6 @@ typedef struct {
43 RW_DEP_MAP_INIT(lockname) } 43 RW_DEP_MAP_INIT(lockname) }
44#endif 44#endif
45 45
46/*
47 * RW_LOCK_UNLOCKED defeat lockdep state tracking and is hence
48 * deprecated.
49 *
50 * Please use DEFINE_RWLOCK() or __RW_LOCK_UNLOCKED() as appropriate.
51 */
52#define RW_LOCK_UNLOCKED __RW_LOCK_UNLOCKED(old_style_rw_init)
53
54#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x) 46#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x)
55 47
56#endif /* __LINUX_RWLOCK_TYPES_H */ 48#endif /* __LINUX_RWLOCK_TYPES_H */
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index bdfcc2527970..34701241b673 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -12,15 +12,7 @@
12#error "please don't include linux/rwsem-spinlock.h directly, use linux/rwsem.h instead" 12#error "please don't include linux/rwsem-spinlock.h directly, use linux/rwsem.h instead"
13#endif 13#endif
14 14
15#include <linux/spinlock.h>
16#include <linux/list.h>
17
18#ifdef __KERNEL__ 15#ifdef __KERNEL__
19
20#include <linux/types.h>
21
22struct rwsem_waiter;
23
24/* 16/*
25 * the rw-semaphore definition 17 * the rw-semaphore definition
26 * - if activity is 0 then there are no active readers or writers 18 * - if activity is 0 then there are no active readers or writers
@@ -37,28 +29,7 @@ struct rw_semaphore {
37#endif 29#endif
38}; 30};
39 31
40#ifdef CONFIG_DEBUG_LOCK_ALLOC 32#define RWSEM_UNLOCKED_VALUE 0x00000000
41# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
42#else
43# define __RWSEM_DEP_MAP_INIT(lockname)
44#endif
45
46#define __RWSEM_INITIALIZER(name) \
47{ 0, __SPIN_LOCK_UNLOCKED(name.wait_lock), LIST_HEAD_INIT((name).wait_list) \
48 __RWSEM_DEP_MAP_INIT(name) }
49
50#define DECLARE_RWSEM(name) \
51 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
52
53extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
54 struct lock_class_key *key);
55
56#define init_rwsem(sem) \
57do { \
58 static struct lock_class_key __key; \
59 \
60 __init_rwsem((sem), #sem, &__key); \
61} while (0)
62 33
63extern void __down_read(struct rw_semaphore *sem); 34extern void __down_read(struct rw_semaphore *sem);
64extern int __down_read_trylock(struct rw_semaphore *sem); 35extern int __down_read_trylock(struct rw_semaphore *sem);
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index efd348fe8ca7..a8afe9cd000c 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -11,6 +11,9 @@
11 11
12#include <linux/types.h> 12#include <linux/types.h>
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/list.h>
15#include <linux/spinlock.h>
16
14#include <asm/system.h> 17#include <asm/system.h>
15#include <asm/atomic.h> 18#include <asm/atomic.h>
16 19
@@ -19,9 +22,57 @@ struct rw_semaphore;
19#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK 22#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
20#include <linux/rwsem-spinlock.h> /* use a generic implementation */ 23#include <linux/rwsem-spinlock.h> /* use a generic implementation */
21#else 24#else
22#include <asm/rwsem.h> /* use an arch-specific implementation */ 25/* All arch specific implementations share the same struct */
26struct rw_semaphore {
27 long count;
28 spinlock_t wait_lock;
29 struct list_head wait_list;
30#ifdef CONFIG_DEBUG_LOCK_ALLOC
31 struct lockdep_map dep_map;
32#endif
33};
34
35extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
36extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
37extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
38extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
39
40/* Include the arch specific part */
41#include <asm/rwsem.h>
42
43/* In all implementations count != 0 means locked */
44static inline int rwsem_is_locked(struct rw_semaphore *sem)
45{
46 return sem->count != 0;
47}
48
49#endif
50
51/* Common initializer macros and functions */
52
53#ifdef CONFIG_DEBUG_LOCK_ALLOC
54# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
55#else
56# define __RWSEM_DEP_MAP_INIT(lockname)
23#endif 57#endif
24 58
59#define __RWSEM_INITIALIZER(name) \
60 { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED(name.wait_lock), \
61 LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
62
63#define DECLARE_RWSEM(name) \
64 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
65
66extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
67 struct lock_class_key *key);
68
69#define init_rwsem(sem) \
70do { \
71 static struct lock_class_key __key; \
72 \
73 __init_rwsem((sem), #sem, &__key); \
74} while (0)
75
25/* 76/*
26 * lock for reading 77 * lock for reading
27 */ 78 */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 777d8a5ed06b..c15936fe998b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1058,6 +1058,7 @@ struct sched_class {
1058 void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags); 1058 void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
1059 void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags); 1059 void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
1060 void (*yield_task) (struct rq *rq); 1060 void (*yield_task) (struct rq *rq);
1061 bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt);
1061 1062
1062 void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags); 1063 void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
1063 1064
@@ -1084,12 +1085,10 @@ struct sched_class {
1084 void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); 1085 void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
1085 void (*task_fork) (struct task_struct *p); 1086 void (*task_fork) (struct task_struct *p);
1086 1087
1087 void (*switched_from) (struct rq *this_rq, struct task_struct *task, 1088 void (*switched_from) (struct rq *this_rq, struct task_struct *task);
1088 int running); 1089 void (*switched_to) (struct rq *this_rq, struct task_struct *task);
1089 void (*switched_to) (struct rq *this_rq, struct task_struct *task,
1090 int running);
1091 void (*prio_changed) (struct rq *this_rq, struct task_struct *task, 1090 void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
1092 int oldprio, int running); 1091 int oldprio);
1093 1092
1094 unsigned int (*get_rr_interval) (struct rq *rq, 1093 unsigned int (*get_rr_interval) (struct rq *rq,
1095 struct task_struct *task); 1094 struct task_struct *task);
@@ -1715,7 +1714,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
1715/* 1714/*
1716 * Per process flags 1715 * Per process flags
1717 */ 1716 */
1718#define PF_KSOFTIRQD 0x00000001 /* I am ksoftirqd */
1719#define PF_STARTING 0x00000002 /* being created */ 1717#define PF_STARTING 0x00000002 /* being created */
1720#define PF_EXITING 0x00000004 /* getting shut down */ 1718#define PF_EXITING 0x00000004 /* getting shut down */
1721#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ 1719#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
@@ -1945,8 +1943,6 @@ int sched_rt_handler(struct ctl_table *table, int write,
1945 void __user *buffer, size_t *lenp, 1943 void __user *buffer, size_t *lenp,
1946 loff_t *ppos); 1944 loff_t *ppos);
1947 1945
1948extern unsigned int sysctl_sched_compat_yield;
1949
1950#ifdef CONFIG_SCHED_AUTOGROUP 1946#ifdef CONFIG_SCHED_AUTOGROUP
1951extern unsigned int sysctl_sched_autogroup_enabled; 1947extern unsigned int sysctl_sched_autogroup_enabled;
1952 1948
@@ -1977,6 +1973,7 @@ static inline int rt_mutex_getprio(struct task_struct *p)
1977# define rt_mutex_adjust_pi(p) do { } while (0) 1973# define rt_mutex_adjust_pi(p) do { } while (0)
1978#endif 1974#endif
1979 1975
1976extern bool yield_to(struct task_struct *p, bool preempt);
1980extern void set_user_nice(struct task_struct *p, long nice); 1977extern void set_user_nice(struct task_struct *p, long nice);
1981extern int task_prio(const struct task_struct *p); 1978extern int task_prio(const struct task_struct *p);
1982extern int task_nice(const struct task_struct *p); 1979extern int task_nice(const struct task_struct *p);
@@ -2049,7 +2046,7 @@ extern void release_uids(struct user_namespace *ns);
2049 2046
2050#include <asm/current.h> 2047#include <asm/current.h>
2051 2048
2052extern void do_timer(unsigned long ticks); 2049extern void xtime_update(unsigned long ticks);
2053 2050
2054extern int wake_up_state(struct task_struct *tsk, unsigned int state); 2051extern int wake_up_state(struct task_struct *tsk, unsigned int state);
2055extern int wake_up_process(struct task_struct *tsk); 2052extern int wake_up_process(struct task_struct *tsk);
@@ -2578,13 +2575,6 @@ static inline void inc_syscw(struct task_struct *tsk)
2578#define TASK_SIZE_OF(tsk) TASK_SIZE 2575#define TASK_SIZE_OF(tsk) TASK_SIZE
2579#endif 2576#endif
2580 2577
2581/*
2582 * Call the function if the target task is executing on a CPU right now:
2583 */
2584extern void task_oncpu_function_call(struct task_struct *p,
2585 void (*func) (void *info), void *info);
2586
2587
2588#ifdef CONFIG_MM_OWNER 2578#ifdef CONFIG_MM_OWNER
2589extern void mm_update_next_owner(struct mm_struct *mm); 2579extern void mm_update_next_owner(struct mm_struct *mm);
2590extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); 2580extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
diff --git a/include/linux/security.h b/include/linux/security.h
index b2b7f9749f5e..debbd97db7ab 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -53,7 +53,7 @@ struct audit_krule;
53 */ 53 */
54extern int cap_capable(struct task_struct *tsk, const struct cred *cred, 54extern int cap_capable(struct task_struct *tsk, const struct cred *cred,
55 int cap, int audit); 55 int cap, int audit);
56extern int cap_settime(struct timespec *ts, struct timezone *tz); 56extern int cap_settime(const struct timespec *ts, const struct timezone *tz);
57extern int cap_ptrace_access_check(struct task_struct *child, unsigned int mode); 57extern int cap_ptrace_access_check(struct task_struct *child, unsigned int mode);
58extern int cap_ptrace_traceme(struct task_struct *parent); 58extern int cap_ptrace_traceme(struct task_struct *parent);
59extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); 59extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted);
@@ -1387,7 +1387,7 @@ struct security_operations {
1387 int (*quotactl) (int cmds, int type, int id, struct super_block *sb); 1387 int (*quotactl) (int cmds, int type, int id, struct super_block *sb);
1388 int (*quota_on) (struct dentry *dentry); 1388 int (*quota_on) (struct dentry *dentry);
1389 int (*syslog) (int type); 1389 int (*syslog) (int type);
1390 int (*settime) (struct timespec *ts, struct timezone *tz); 1390 int (*settime) (const struct timespec *ts, const struct timezone *tz);
1391 int (*vm_enough_memory) (struct mm_struct *mm, long pages); 1391 int (*vm_enough_memory) (struct mm_struct *mm, long pages);
1392 1392
1393 int (*bprm_set_creds) (struct linux_binprm *bprm); 1393 int (*bprm_set_creds) (struct linux_binprm *bprm);
@@ -1669,7 +1669,7 @@ int security_sysctl(struct ctl_table *table, int op);
1669int security_quotactl(int cmds, int type, int id, struct super_block *sb); 1669int security_quotactl(int cmds, int type, int id, struct super_block *sb);
1670int security_quota_on(struct dentry *dentry); 1670int security_quota_on(struct dentry *dentry);
1671int security_syslog(int type); 1671int security_syslog(int type);
1672int security_settime(struct timespec *ts, struct timezone *tz); 1672int security_settime(const struct timespec *ts, const struct timezone *tz);
1673int security_vm_enough_memory(long pages); 1673int security_vm_enough_memory(long pages);
1674int security_vm_enough_memory_mm(struct mm_struct *mm, long pages); 1674int security_vm_enough_memory_mm(struct mm_struct *mm, long pages);
1675int security_vm_enough_memory_kern(long pages); 1675int security_vm_enough_memory_kern(long pages);
@@ -1904,7 +1904,8 @@ static inline int security_syslog(int type)
1904 return 0; 1904 return 0;
1905} 1905}
1906 1906
1907static inline int security_settime(struct timespec *ts, struct timezone *tz) 1907static inline int security_settime(const struct timespec *ts,
1908 const struct timezone *tz)
1908{ 1909{
1909 return cap_settime(ts, tz); 1910 return cap_settime(ts, tz);
1910} 1911}
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index 851b7783720d..73548eb13a5d 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -81,14 +81,6 @@ typedef struct spinlock {
81#define __SPIN_LOCK_UNLOCKED(lockname) \ 81#define __SPIN_LOCK_UNLOCKED(lockname) \
82 (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname) 82 (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
83 83
84/*
85 * SPIN_LOCK_UNLOCKED defeats lockdep state tracking and is hence
86 * deprecated.
87 * Please use DEFINE_SPINLOCK() or __SPIN_LOCK_UNLOCKED() as
88 * appropriate.
89 */
90#define SPIN_LOCK_UNLOCKED __SPIN_LOCK_UNLOCKED(old_style_spin_init)
91
92#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) 84#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
93 85
94#include <linux/rwlock_types.h> 86#include <linux/rwlock_types.h>
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 88513fd8e208..d81db8012c63 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -212,6 +212,7 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *);
212struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, 212struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
213 const struct rpc_call_ops *ops); 213 const struct rpc_call_ops *ops);
214void rpc_put_task(struct rpc_task *); 214void rpc_put_task(struct rpc_task *);
215void rpc_put_task_async(struct rpc_task *);
215void rpc_exit_task(struct rpc_task *); 216void rpc_exit_task(struct rpc_task *);
216void rpc_exit(struct rpc_task *, int); 217void rpc_exit(struct rpc_task *, int);
217void rpc_release_calldata(const struct rpc_call_ops *, void *); 218void rpc_release_calldata(const struct rpc_call_ops *, void *);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 98664db1be47..1f5c18e6f4f1 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -62,6 +62,7 @@ struct robust_list_head;
62struct getcpu_cache; 62struct getcpu_cache;
63struct old_linux_dirent; 63struct old_linux_dirent;
64struct perf_event_attr; 64struct perf_event_attr;
65struct file_handle;
65 66
66#include <linux/types.h> 67#include <linux/types.h>
67#include <linux/aio_abi.h> 68#include <linux/aio_abi.h>
@@ -132,11 +133,11 @@ extern struct trace_event_functions exit_syscall_print_funcs;
132 .class = &event_class_syscall_enter, \ 133 .class = &event_class_syscall_enter, \
133 .event.funcs = &enter_syscall_print_funcs, \ 134 .event.funcs = &enter_syscall_print_funcs, \
134 .data = (void *)&__syscall_meta_##sname,\ 135 .data = (void *)&__syscall_meta_##sname,\
136 .flags = TRACE_EVENT_FL_CAP_ANY, \
135 }; \ 137 }; \
136 static struct ftrace_event_call __used \ 138 static struct ftrace_event_call __used \
137 __attribute__((section("_ftrace_events"))) \ 139 __attribute__((section("_ftrace_events"))) \
138 *__event_enter_##sname = &event_enter_##sname; \ 140 *__event_enter_##sname = &event_enter_##sname;
139 __TRACE_EVENT_FLAGS(enter_##sname, TRACE_EVENT_FL_CAP_ANY)
140 141
141#define SYSCALL_TRACE_EXIT_EVENT(sname) \ 142#define SYSCALL_TRACE_EXIT_EVENT(sname) \
142 static struct syscall_metadata __syscall_meta_##sname; \ 143 static struct syscall_metadata __syscall_meta_##sname; \
@@ -146,11 +147,11 @@ extern struct trace_event_functions exit_syscall_print_funcs;
146 .class = &event_class_syscall_exit, \ 147 .class = &event_class_syscall_exit, \
147 .event.funcs = &exit_syscall_print_funcs, \ 148 .event.funcs = &exit_syscall_print_funcs, \
148 .data = (void *)&__syscall_meta_##sname,\ 149 .data = (void *)&__syscall_meta_##sname,\
150 .flags = TRACE_EVENT_FL_CAP_ANY, \
149 }; \ 151 }; \
150 static struct ftrace_event_call __used \ 152 static struct ftrace_event_call __used \
151 __attribute__((section("_ftrace_events"))) \ 153 __attribute__((section("_ftrace_events"))) \
152 *__event_exit_##sname = &event_exit_##sname; \ 154 *__event_exit_##sname = &event_exit_##sname;
153 __TRACE_EVENT_FLAGS(exit_##sname, TRACE_EVENT_FL_CAP_ANY)
154 155
155#define SYSCALL_METADATA(sname, nb) \ 156#define SYSCALL_METADATA(sname, nb) \
156 SYSCALL_TRACE_ENTER_EVENT(sname); \ 157 SYSCALL_TRACE_ENTER_EVENT(sname); \
@@ -158,6 +159,7 @@ extern struct trace_event_functions exit_syscall_print_funcs;
158 static struct syscall_metadata __used \ 159 static struct syscall_metadata __used \
159 __syscall_meta_##sname = { \ 160 __syscall_meta_##sname = { \
160 .name = "sys"#sname, \ 161 .name = "sys"#sname, \
162 .syscall_nr = -1, /* Filled in at boot */ \
161 .nb_args = nb, \ 163 .nb_args = nb, \
162 .types = types_##sname, \ 164 .types = types_##sname, \
163 .args = args_##sname, \ 165 .args = args_##sname, \
@@ -175,6 +177,7 @@ extern struct trace_event_functions exit_syscall_print_funcs;
175 static struct syscall_metadata __used \ 177 static struct syscall_metadata __used \
176 __syscall_meta__##sname = { \ 178 __syscall_meta__##sname = { \
177 .name = "sys_"#sname, \ 179 .name = "sys_"#sname, \
180 .syscall_nr = -1, /* Filled in at boot */ \
178 .nb_args = 0, \ 181 .nb_args = 0, \
179 .enter_event = &event_enter__##sname, \ 182 .enter_event = &event_enter__##sname, \
180 .exit_event = &event_exit__##sname, \ 183 .exit_event = &event_exit__##sname, \
@@ -313,6 +316,8 @@ asmlinkage long sys_clock_settime(clockid_t which_clock,
313 const struct timespec __user *tp); 316 const struct timespec __user *tp);
314asmlinkage long sys_clock_gettime(clockid_t which_clock, 317asmlinkage long sys_clock_gettime(clockid_t which_clock,
315 struct timespec __user *tp); 318 struct timespec __user *tp);
319asmlinkage long sys_clock_adjtime(clockid_t which_clock,
320 struct timex __user *tx);
316asmlinkage long sys_clock_getres(clockid_t which_clock, 321asmlinkage long sys_clock_getres(clockid_t which_clock,
317 struct timespec __user *tp); 322 struct timespec __user *tp);
318asmlinkage long sys_clock_nanosleep(clockid_t which_clock, int flags, 323asmlinkage long sys_clock_nanosleep(clockid_t which_clock, int flags,
@@ -832,5 +837,10 @@ asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len,
832 unsigned long prot, unsigned long flags, 837 unsigned long prot, unsigned long flags,
833 unsigned long fd, unsigned long pgoff); 838 unsigned long fd, unsigned long pgoff);
834asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg); 839asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg);
835 840asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name,
841 struct file_handle __user *handle,
842 int __user *mnt_id, int flag);
843asmlinkage long sys_open_by_handle_at(int mountdirfd,
844 struct file_handle __user *handle,
845 int flags);
836#endif 846#endif
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 7bb5cb64f3b8..11684d9e6bd2 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -930,6 +930,7 @@ enum
930 930
931#ifdef __KERNEL__ 931#ifdef __KERNEL__
932#include <linux/list.h> 932#include <linux/list.h>
933#include <linux/rcupdate.h>
933 934
934/* For the /proc/sys support */ 935/* For the /proc/sys support */
935struct ctl_table; 936struct ctl_table;
@@ -1037,10 +1038,15 @@ struct ctl_table_root {
1037 struct ctl_table trees. */ 1038 struct ctl_table trees. */
1038struct ctl_table_header 1039struct ctl_table_header
1039{ 1040{
1040 struct ctl_table *ctl_table; 1041 union {
1041 struct list_head ctl_entry; 1042 struct {
1042 int used; 1043 struct ctl_table *ctl_table;
1043 int count; 1044 struct list_head ctl_entry;
1045 int used;
1046 int count;
1047 };
1048 struct rcu_head rcu;
1049 };
1044 struct completion *unregistering; 1050 struct completion *unregistering;
1045 struct ctl_table *ctl_table_arg; 1051 struct ctl_table *ctl_table_arg;
1046 struct ctl_table_root *root; 1052 struct ctl_table_root *root;
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 8651556dbd52..d3ec89fb4122 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -172,6 +172,14 @@ void thermal_zone_device_update(struct thermal_zone_device *);
172struct thermal_cooling_device *thermal_cooling_device_register(char *, void *, 172struct thermal_cooling_device *thermal_cooling_device_register(char *, void *,
173 const struct thermal_cooling_device_ops *); 173 const struct thermal_cooling_device_ops *);
174void thermal_cooling_device_unregister(struct thermal_cooling_device *); 174void thermal_cooling_device_unregister(struct thermal_cooling_device *);
175
176#ifdef CONFIG_NET
175extern int generate_netlink_event(u32 orig, enum events event); 177extern int generate_netlink_event(u32 orig, enum events event);
178#else
179static inline int generate_netlink_event(u32 orig, enum events event)
180{
181 return 0;
182}
183#endif
176 184
177#endif /* __THERMAL_H__ */ 185#endif /* __THERMAL_H__ */
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index c90696544176..20fc303947d3 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -18,9 +18,6 @@ struct compat_timespec;
18struct restart_block { 18struct restart_block {
19 long (*fn)(struct restart_block *); 19 long (*fn)(struct restart_block *);
20 union { 20 union {
21 struct {
22 unsigned long arg0, arg1, arg2, arg3;
23 };
24 /* For futex_wait and futex_wait_requeue_pi */ 21 /* For futex_wait and futex_wait_requeue_pi */
25 struct { 22 struct {
26 u32 __user *uaddr; 23 u32 __user *uaddr;
diff --git a/include/linux/time.h b/include/linux/time.h
index 1e6d3b59238d..454a26205787 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -113,8 +113,6 @@ static inline struct timespec timespec_sub(struct timespec lhs,
113#define timespec_valid(ts) \ 113#define timespec_valid(ts) \
114 (((ts)->tv_sec >= 0) && (((unsigned long) (ts)->tv_nsec) < NSEC_PER_SEC)) 114 (((ts)->tv_sec >= 0) && (((unsigned long) (ts)->tv_nsec) < NSEC_PER_SEC))
115 115
116extern seqlock_t xtime_lock;
117
118extern void read_persistent_clock(struct timespec *ts); 116extern void read_persistent_clock(struct timespec *ts);
119extern void read_boot_clock(struct timespec *ts); 117extern void read_boot_clock(struct timespec *ts);
120extern int update_persistent_clock(struct timespec now); 118extern int update_persistent_clock(struct timespec now);
@@ -125,8 +123,9 @@ extern int timekeeping_suspended;
125unsigned long get_seconds(void); 123unsigned long get_seconds(void);
126struct timespec current_kernel_time(void); 124struct timespec current_kernel_time(void);
127struct timespec __current_kernel_time(void); /* does not take xtime_lock */ 125struct timespec __current_kernel_time(void); /* does not take xtime_lock */
128struct timespec __get_wall_to_monotonic(void); /* does not take xtime_lock */
129struct timespec get_monotonic_coarse(void); 126struct timespec get_monotonic_coarse(void);
127void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
128 struct timespec *wtom, struct timespec *sleep);
130 129
131#define CURRENT_TIME (current_kernel_time()) 130#define CURRENT_TIME (current_kernel_time())
132#define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 }) 131#define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 })
@@ -147,8 +146,9 @@ static inline u32 arch_gettimeoffset(void) { return 0; }
147#endif 146#endif
148 147
149extern void do_gettimeofday(struct timeval *tv); 148extern void do_gettimeofday(struct timeval *tv);
150extern int do_settimeofday(struct timespec *tv); 149extern int do_settimeofday(const struct timespec *tv);
151extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz); 150extern int do_sys_settimeofday(const struct timespec *tv,
151 const struct timezone *tz);
152#define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts) 152#define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts)
153extern long do_utimes(int dfd, const char __user *filename, struct timespec *times, int flags); 153extern long do_utimes(int dfd, const char __user *filename, struct timespec *times, int flags);
154struct itimerval; 154struct itimerval;
@@ -162,12 +162,13 @@ extern void getnstime_raw_and_real(struct timespec *ts_raw,
162 struct timespec *ts_real); 162 struct timespec *ts_real);
163extern void getboottime(struct timespec *ts); 163extern void getboottime(struct timespec *ts);
164extern void monotonic_to_bootbased(struct timespec *ts); 164extern void monotonic_to_bootbased(struct timespec *ts);
165extern void get_monotonic_boottime(struct timespec *ts);
165 166
166extern struct timespec timespec_trunc(struct timespec t, unsigned gran); 167extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
167extern int timekeeping_valid_for_hres(void); 168extern int timekeeping_valid_for_hres(void);
168extern u64 timekeeping_max_deferment(void); 169extern u64 timekeeping_max_deferment(void);
169extern void update_wall_time(void);
170extern void timekeeping_leap_insert(int leapsecond); 170extern void timekeeping_leap_insert(int leapsecond);
171extern int timekeeping_inject_offset(struct timespec *ts);
171 172
172struct tms; 173struct tms;
173extern void do_sys_times(struct tms *); 174extern void do_sys_times(struct tms *);
@@ -292,6 +293,7 @@ struct itimerval {
292#define CLOCK_MONOTONIC_RAW 4 293#define CLOCK_MONOTONIC_RAW 4
293#define CLOCK_REALTIME_COARSE 5 294#define CLOCK_REALTIME_COARSE 5
294#define CLOCK_MONOTONIC_COARSE 6 295#define CLOCK_MONOTONIC_COARSE 6
296#define CLOCK_BOOTTIME 7
295 297
296/* 298/*
297 * The IDs of various hardware clocks: 299 * The IDs of various hardware clocks:
diff --git a/include/linux/timex.h b/include/linux/timex.h
index d23999f9499d..aa60fe7b6ed6 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -73,7 +73,7 @@ struct timex {
73 long tolerance; /* clock frequency tolerance (ppm) 73 long tolerance; /* clock frequency tolerance (ppm)
74 * (read only) 74 * (read only)
75 */ 75 */
76 struct timeval time; /* (read only) */ 76 struct timeval time; /* (read only, except for ADJ_SETOFFSET) */
77 long tick; /* (modified) usecs between clock ticks */ 77 long tick; /* (modified) usecs between clock ticks */
78 78
79 long ppsfreq; /* pps frequency (scaled ppm) (ro) */ 79 long ppsfreq; /* pps frequency (scaled ppm) (ro) */
@@ -102,6 +102,7 @@ struct timex {
102#define ADJ_STATUS 0x0010 /* clock status */ 102#define ADJ_STATUS 0x0010 /* clock status */
103#define ADJ_TIMECONST 0x0020 /* pll time constant */ 103#define ADJ_TIMECONST 0x0020 /* pll time constant */
104#define ADJ_TAI 0x0080 /* set TAI offset */ 104#define ADJ_TAI 0x0080 /* set TAI offset */
105#define ADJ_SETOFFSET 0x0100 /* add 'time' to current time */
105#define ADJ_MICRO 0x1000 /* select microsecond resolution */ 106#define ADJ_MICRO 0x1000 /* select microsecond resolution */
106#define ADJ_NANO 0x2000 /* select nanosecond resolution */ 107#define ADJ_NANO 0x2000 /* select nanosecond resolution */
107#define ADJ_TICK 0x4000 /* tick value */ 108#define ADJ_TICK 0x4000 /* tick value */
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 4a3cd2cd2f5e..96e50e0ce3ca 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -89,6 +89,18 @@
89#define IPV6_ADDR_SCOPE_GLOBAL 0x0e 89#define IPV6_ADDR_SCOPE_GLOBAL 0x0e
90 90
91/* 91/*
92 * Addr flags
93 */
94#ifdef __KERNEL__
95#define IPV6_ADDR_MC_FLAG_TRANSIENT(a) \
96 ((a)->s6_addr[1] & 0x10)
97#define IPV6_ADDR_MC_FLAG_PREFIX(a) \
98 ((a)->s6_addr[1] & 0x20)
99#define IPV6_ADDR_MC_FLAG_RENDEZVOUS(a) \
100 ((a)->s6_addr[1] & 0x40)
101#endif
102
103/*
92 * fragmentation header 104 * fragmentation header
93 */ 105 */
94 106
diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h
index cd85b3bc8327..e505358d8999 100644
--- a/include/net/netfilter/nf_tproxy_core.h
+++ b/include/net/netfilter/nf_tproxy_core.h
@@ -201,18 +201,8 @@ nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
201} 201}
202#endif 202#endif
203 203
204static inline void
205nf_tproxy_put_sock(struct sock *sk)
206{
207 /* TIME_WAIT inet sockets have to be handled differently */
208 if ((sk->sk_protocol == IPPROTO_TCP) && (sk->sk_state == TCP_TIME_WAIT))
209 inet_twsk_put(inet_twsk(sk));
210 else
211 sock_put(sk);
212}
213
214/* assign a socket to the skb -- consumes sk */ 204/* assign a socket to the skb -- consumes sk */
215int 205void
216nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk); 206nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk);
217 207
218#endif 208#endif
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 160a407c1963..04f8556313d5 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -199,7 +199,7 @@ struct tcf_proto {
199 199
200struct qdisc_skb_cb { 200struct qdisc_skb_cb {
201 unsigned int pkt_len; 201 unsigned int pkt_len;
202 char data[]; 202 long data[];
203}; 203};
204 204
205static inline int qdisc_qlen(struct Qdisc *q) 205static inline int qdisc_qlen(struct Qdisc *q)
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index 8479b66c067b..3fd5064dd43a 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -261,6 +261,7 @@ void pcmcia_disable_device(struct pcmcia_device *p_dev);
261#define CONF_ENABLE_ESR 0x0008 261#define CONF_ENABLE_ESR 0x0008
262#define CONF_ENABLE_IOCARD 0x0010 /* auto-enabled if IO resources or IRQ 262#define CONF_ENABLE_IOCARD 0x0010 /* auto-enabled if IO resources or IRQ
263 * (CONF_ENABLE_IRQ) in use */ 263 * (CONF_ENABLE_IRQ) in use */
264#define CONF_ENABLE_ZVCARD 0x0020
264 265
265/* flags used by pcmcia_loop_config() autoconfiguration */ 266/* flags used by pcmcia_loop_config() autoconfiguration */
266#define CONF_AUTO_CHECK_VCC 0x0100 /* check for matching Vcc? */ 267#define CONF_AUTO_CHECK_VCC 0x0100 /* check for matching Vcc? */
diff --git a/include/sound/wm8903.h b/include/sound/wm8903.h
index b4a0db2307ef..1eeebd534f7e 100644
--- a/include/sound/wm8903.h
+++ b/include/sound/wm8903.h
@@ -17,13 +17,9 @@
17/* 17/*
18 * R6 (0x06) - Mic Bias Control 0 18 * R6 (0x06) - Mic Bias Control 0
19 */ 19 */
20#define WM8903_MICDET_HYST_ENA 0x0080 /* MICDET_HYST_ENA */ 20#define WM8903_MICDET_THR_MASK 0x0030 /* MICDET_THR - [5:4] */
21#define WM8903_MICDET_HYST_ENA_MASK 0x0080 /* MICDET_HYST_ENA */ 21#define WM8903_MICDET_THR_SHIFT 4 /* MICDET_THR - [5:4] */
22#define WM8903_MICDET_HYST_ENA_SHIFT 7 /* MICDET_HYST_ENA */ 22#define WM8903_MICDET_THR_WIDTH 2 /* MICDET_THR - [5:4] */
23#define WM8903_MICDET_HYST_ENA_WIDTH 1 /* MICDET_HYST_ENA */
24#define WM8903_MICDET_THR_MASK 0x0070 /* MICDET_THR - [6:4] */
25#define WM8903_MICDET_THR_SHIFT 4 /* MICDET_THR - [6:4] */
26#define WM8903_MICDET_THR_WIDTH 3 /* MICDET_THR - [6:4] */
27#define WM8903_MICSHORT_THR_MASK 0x000C /* MICSHORT_THR - [3:2] */ 23#define WM8903_MICSHORT_THR_MASK 0x000C /* MICSHORT_THR - [3:2] */
28#define WM8903_MICSHORT_THR_SHIFT 2 /* MICSHORT_THR - [3:2] */ 24#define WM8903_MICSHORT_THR_SHIFT 2 /* MICSHORT_THR - [3:2] */
29#define WM8903_MICSHORT_THR_WIDTH 2 /* MICSHORT_THR - [3:2] */ 25#define WM8903_MICSHORT_THR_WIDTH 2 /* MICSHORT_THR - [3:2] */
diff --git a/include/target/target_core_transport.h b/include/target/target_core_transport.h
index 246940511579..2e8ec51f0615 100644
--- a/include/target/target_core_transport.h
+++ b/include/target/target_core_transport.h
@@ -135,6 +135,8 @@ extern void transport_complete_task(struct se_task *, int);
135extern void transport_add_task_to_execute_queue(struct se_task *, 135extern void transport_add_task_to_execute_queue(struct se_task *,
136 struct se_task *, 136 struct se_task *,
137 struct se_device *); 137 struct se_device *);
138extern void transport_remove_task_from_execute_queue(struct se_task *,
139 struct se_device *);
138unsigned char *transport_dump_cmd_direction(struct se_cmd *); 140unsigned char *transport_dump_cmd_direction(struct se_cmd *);
139extern void transport_dump_dev_state(struct se_device *, char *, int *); 141extern void transport_dump_dev_state(struct se_device *, char *, int *);
140extern void transport_dump_dev_info(struct se_device *, struct se_lun *, 142extern void transport_dump_dev_info(struct se_device *, struct se_lun *,
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index aba421d68f6f..78f18adb49c8 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -31,7 +31,7 @@ DECLARE_EVENT_CLASS(block_rq_with_error,
31 0 : blk_rq_sectors(rq); 31 0 : blk_rq_sectors(rq);
32 __entry->errors = rq->errors; 32 __entry->errors = rq->errors;
33 33
34 blk_fill_rwbs_rq(__entry->rwbs, rq); 34 blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
35 blk_dump_cmd(__get_str(cmd), rq); 35 blk_dump_cmd(__get_str(cmd), rq);
36 ), 36 ),
37 37
@@ -118,7 +118,7 @@ DECLARE_EVENT_CLASS(block_rq,
118 __entry->bytes = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? 118 __entry->bytes = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
119 blk_rq_bytes(rq) : 0; 119 blk_rq_bytes(rq) : 0;
120 120
121 blk_fill_rwbs_rq(__entry->rwbs, rq); 121 blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
122 blk_dump_cmd(__get_str(cmd), rq); 122 blk_dump_cmd(__get_str(cmd), rq);
123 memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 123 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
124 ), 124 ),
@@ -563,7 +563,7 @@ TRACE_EVENT(block_rq_remap,
563 __entry->nr_sector = blk_rq_sectors(rq); 563 __entry->nr_sector = blk_rq_sectors(rq);
564 __entry->old_dev = dev; 564 __entry->old_dev = dev;
565 __entry->old_sector = from; 565 __entry->old_sector = from;
566 blk_fill_rwbs_rq(__entry->rwbs, rq); 566 blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
567 ), 567 ),
568 568
569 TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", 569 TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h
index 7eee77895cb3..4cbbcef6baa8 100644
--- a/include/trace/events/mce.h
+++ b/include/trace/events/mce.h
@@ -17,36 +17,36 @@ TRACE_EVENT(mce_record,
17 TP_STRUCT__entry( 17 TP_STRUCT__entry(
18 __field( u64, mcgcap ) 18 __field( u64, mcgcap )
19 __field( u64, mcgstatus ) 19 __field( u64, mcgstatus )
20 __field( u8, bank )
21 __field( u64, status ) 20 __field( u64, status )
22 __field( u64, addr ) 21 __field( u64, addr )
23 __field( u64, misc ) 22 __field( u64, misc )
24 __field( u64, ip ) 23 __field( u64, ip )
25 __field( u8, cs )
26 __field( u64, tsc ) 24 __field( u64, tsc )
27 __field( u64, walltime ) 25 __field( u64, walltime )
28 __field( u32, cpu ) 26 __field( u32, cpu )
29 __field( u32, cpuid ) 27 __field( u32, cpuid )
30 __field( u32, apicid ) 28 __field( u32, apicid )
31 __field( u32, socketid ) 29 __field( u32, socketid )
30 __field( u8, cs )
31 __field( u8, bank )
32 __field( u8, cpuvendor ) 32 __field( u8, cpuvendor )
33 ), 33 ),
34 34
35 TP_fast_assign( 35 TP_fast_assign(
36 __entry->mcgcap = m->mcgcap; 36 __entry->mcgcap = m->mcgcap;
37 __entry->mcgstatus = m->mcgstatus; 37 __entry->mcgstatus = m->mcgstatus;
38 __entry->bank = m->bank;
39 __entry->status = m->status; 38 __entry->status = m->status;
40 __entry->addr = m->addr; 39 __entry->addr = m->addr;
41 __entry->misc = m->misc; 40 __entry->misc = m->misc;
42 __entry->ip = m->ip; 41 __entry->ip = m->ip;
43 __entry->cs = m->cs;
44 __entry->tsc = m->tsc; 42 __entry->tsc = m->tsc;
45 __entry->walltime = m->time; 43 __entry->walltime = m->time;
46 __entry->cpu = m->extcpu; 44 __entry->cpu = m->extcpu;
47 __entry->cpuid = m->cpuid; 45 __entry->cpuid = m->cpuid;
48 __entry->apicid = m->apicid; 46 __entry->apicid = m->apicid;
49 __entry->socketid = m->socketid; 47 __entry->socketid = m->socketid;
48 __entry->cs = m->cs;
49 __entry->bank = m->bank;
50 __entry->cpuvendor = m->cpuvendor; 50 __entry->cpuvendor = m->cpuvendor;
51 ), 51 ),
52 52
diff --git a/include/trace/events/module.h b/include/trace/events/module.h
index c6bae36547e5..21a546d27c0c 100644
--- a/include/trace/events/module.h
+++ b/include/trace/events/module.h
@@ -108,14 +108,14 @@ TRACE_EVENT(module_request,
108 TP_ARGS(name, wait, ip), 108 TP_ARGS(name, wait, ip),
109 109
110 TP_STRUCT__entry( 110 TP_STRUCT__entry(
111 __field( bool, wait )
112 __field( unsigned long, ip ) 111 __field( unsigned long, ip )
112 __field( bool, wait )
113 __string( name, name ) 113 __string( name, name )
114 ), 114 ),
115 115
116 TP_fast_assign( 116 TP_fast_assign(
117 __entry->wait = wait;
118 __entry->ip = ip; 117 __entry->ip = ip;
118 __entry->wait = wait;
119 __assign_str(name, name); 119 __assign_str(name, name);
120 ), 120 ),
121 121
@@ -129,4 +129,3 @@ TRACE_EVENT(module_request,
129 129
130/* This part must be outside protection */ 130/* This part must be outside protection */
131#include <trace/define_trace.h> 131#include <trace/define_trace.h>
132
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index f10293c41b1e..0c68ae22da22 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -19,14 +19,14 @@ TRACE_EVENT(kfree_skb,
19 19
20 TP_STRUCT__entry( 20 TP_STRUCT__entry(
21 __field( void *, skbaddr ) 21 __field( void *, skbaddr )
22 __field( unsigned short, protocol )
23 __field( void *, location ) 22 __field( void *, location )
23 __field( unsigned short, protocol )
24 ), 24 ),
25 25
26 TP_fast_assign( 26 TP_fast_assign(
27 __entry->skbaddr = skb; 27 __entry->skbaddr = skb;
28 __entry->protocol = ntohs(skb->protocol);
29 __entry->location = location; 28 __entry->location = location;
29 __entry->protocol = ntohs(skb->protocol);
30 ), 30 ),
31 31
32 TP_printk("skbaddr=%p protocol=%u location=%p", 32 TP_printk("skbaddr=%p protocol=%u location=%p",
diff --git a/include/xen/events.h b/include/xen/events.h
index 00f53ddcc062..962da2ced5b4 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -75,11 +75,9 @@ int xen_allocate_pirq(unsigned gsi, int shareable, char *name);
75int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name); 75int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name);
76 76
77#ifdef CONFIG_PCI_MSI 77#ifdef CONFIG_PCI_MSI
78/* Allocate an irq and a pirq to be used with MSIs. */ 78int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc);
79#define XEN_ALLOC_PIRQ (1 << 0) 79int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
80#define XEN_ALLOC_IRQ (1 << 1) 80 int pirq, int vector, const char *name);
81void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc_mask);
82int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type);
83#endif 81#endif
84 82
85/* De-allocates the above mentioned physical interrupt. */ 83/* De-allocates the above mentioned physical interrupt. */
diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
index c2d1fa4dc1ee..61e523af3c46 100644
--- a/include/xen/interface/io/blkif.h
+++ b/include/xen/interface/io/blkif.h
@@ -51,11 +51,7 @@ typedef uint64_t blkif_sector_t;
51 */ 51 */
52#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 52#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
53 53
54struct blkif_request { 54struct blkif_request_rw {
55 uint8_t operation; /* BLKIF_OP_??? */
56 uint8_t nr_segments; /* number of segments */
57 blkif_vdev_t handle; /* only for read/write requests */
58 uint64_t id; /* private guest value, echoed in resp */
59 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ 55 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
60 struct blkif_request_segment { 56 struct blkif_request_segment {
61 grant_ref_t gref; /* reference to I/O buffer frame */ 57 grant_ref_t gref; /* reference to I/O buffer frame */
@@ -65,6 +61,16 @@ struct blkif_request {
65 } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 61 } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
66}; 62};
67 63
64struct blkif_request {
65 uint8_t operation; /* BLKIF_OP_??? */
66 uint8_t nr_segments; /* number of segments */
67 blkif_vdev_t handle; /* only for read/write requests */
68 uint64_t id; /* private guest value, echoed in resp */
69 union {
70 struct blkif_request_rw rw;
71 } u;
72};
73
68struct blkif_response { 74struct blkif_response {
69 uint64_t id; /* copied from request */ 75 uint64_t id; /* copied from request */
70 uint8_t operation; /* copied from request */ 76 uint8_t operation; /* copied from request */
@@ -91,4 +97,25 @@ DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
91#define VDISK_REMOVABLE 0x2 97#define VDISK_REMOVABLE 0x2
92#define VDISK_READONLY 0x4 98#define VDISK_READONLY 0x4
93 99
100/* Xen-defined major numbers for virtual disks, they look strangely
101 * familiar */
102#define XEN_IDE0_MAJOR 3
103#define XEN_IDE1_MAJOR 22
104#define XEN_SCSI_DISK0_MAJOR 8
105#define XEN_SCSI_DISK1_MAJOR 65
106#define XEN_SCSI_DISK2_MAJOR 66
107#define XEN_SCSI_DISK3_MAJOR 67
108#define XEN_SCSI_DISK4_MAJOR 68
109#define XEN_SCSI_DISK5_MAJOR 69
110#define XEN_SCSI_DISK6_MAJOR 70
111#define XEN_SCSI_DISK7_MAJOR 71
112#define XEN_SCSI_DISK8_MAJOR 128
113#define XEN_SCSI_DISK9_MAJOR 129
114#define XEN_SCSI_DISK10_MAJOR 130
115#define XEN_SCSI_DISK11_MAJOR 131
116#define XEN_SCSI_DISK12_MAJOR 132
117#define XEN_SCSI_DISK13_MAJOR 133
118#define XEN_SCSI_DISK14_MAJOR 134
119#define XEN_SCSI_DISK15_MAJOR 135
120
94#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ 121#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 2befa3e2f1bc..b33257bc7e83 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -30,7 +30,7 @@
30#define __HYPERVISOR_stack_switch 3 30#define __HYPERVISOR_stack_switch 3
31#define __HYPERVISOR_set_callbacks 4 31#define __HYPERVISOR_set_callbacks 4
32#define __HYPERVISOR_fpu_taskswitch 5 32#define __HYPERVISOR_fpu_taskswitch 5
33#define __HYPERVISOR_sched_op 6 33#define __HYPERVISOR_sched_op_compat 6
34#define __HYPERVISOR_dom0_op 7 34#define __HYPERVISOR_dom0_op 7
35#define __HYPERVISOR_set_debugreg 8 35#define __HYPERVISOR_set_debugreg 8
36#define __HYPERVISOR_get_debugreg 9 36#define __HYPERVISOR_get_debugreg 9
@@ -52,7 +52,7 @@
52#define __HYPERVISOR_mmuext_op 26 52#define __HYPERVISOR_mmuext_op 26
53#define __HYPERVISOR_acm_op 27 53#define __HYPERVISOR_acm_op 27
54#define __HYPERVISOR_nmi_op 28 54#define __HYPERVISOR_nmi_op 28
55#define __HYPERVISOR_sched_op_new 29 55#define __HYPERVISOR_sched_op 29
56#define __HYPERVISOR_callback_op 30 56#define __HYPERVISOR_callback_op 30
57#define __HYPERVISOR_xenoprof_op 31 57#define __HYPERVISOR_xenoprof_op 31
58#define __HYPERVISOR_event_channel_op 32 58#define __HYPERVISOR_event_channel_op 32
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 98b92154a264..03c85d7387fb 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -5,9 +5,9 @@
5 5
6DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); 6DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
7 7
8void xen_pre_suspend(void); 8void xen_arch_pre_suspend(void);
9void xen_post_suspend(int suspend_cancelled); 9void xen_arch_post_suspend(int suspend_cancelled);
10void xen_hvm_post_suspend(int suspend_cancelled); 10void xen_arch_hvm_post_suspend(int suspend_cancelled);
11 11
12void xen_mm_pin_all(void); 12void xen_mm_pin_all(void);
13void xen_mm_unpin_all(void); 13void xen_mm_unpin_all(void);
diff --git a/init/Kconfig b/init/Kconfig
index be788c0957d4..5721d27af626 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -287,6 +287,18 @@ config BSD_PROCESS_ACCT_V3
287 for processing it. A preliminary version of these tools is available 287 for processing it. A preliminary version of these tools is available
288 at <http://www.gnu.org/software/acct/>. 288 at <http://www.gnu.org/software/acct/>.
289 289
290config FHANDLE
291 bool "open by fhandle syscalls"
292 select EXPORTFS
293 help
294 If you say Y here, a user level program will be able to map
295 file names to handle and then later use the handle for
296 different file system operations. This is useful in implementing
297 userspace file servers, which now track files using handles instead
298 of names. The handle would remain the same even if file names
299 get renamed. Enables open_by_handle_at(2) and name_to_handle_at(2)
300 syscalls.
301
290config TASKSTATS 302config TASKSTATS
291 bool "Export task/process statistics through netlink (EXPERIMENTAL)" 303 bool "Export task/process statistics through netlink (EXPERIMENTAL)"
292 depends on NET 304 depends on NET
@@ -683,6 +695,16 @@ config CGROUP_MEM_RES_CTLR_SWAP_ENABLED
683 select this option (if, for some reason, they need to disable it 695 select this option (if, for some reason, they need to disable it
684 then noswapaccount does the trick). 696 then noswapaccount does the trick).
685 697
698config CGROUP_PERF
699 bool "Enable perf_event per-cpu per-container group (cgroup) monitoring"
700 depends on PERF_EVENTS && CGROUPS
701 help
702 This option extends the per-cpu mode to restrict monitoring to
703 threads which belong to the cgroup specified and run on the
704 designated cpu.
705
706 Say N if unsure.
707
686menuconfig CGROUP_SCHED 708menuconfig CGROUP_SCHED
687 bool "Group CPU scheduler" 709 bool "Group CPU scheduler"
688 depends on EXPERIMENTAL 710 depends on EXPERIMENTAL
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index d2e3c7866460..e683869365d9 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -144,9 +144,9 @@ int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev)
144} 144}
145 145
146/* Initialize a parent watch entry. */ 146/* Initialize a parent watch entry. */
147static struct audit_parent *audit_init_parent(struct nameidata *ndp) 147static struct audit_parent *audit_init_parent(struct path *path)
148{ 148{
149 struct inode *inode = ndp->path.dentry->d_inode; 149 struct inode *inode = path->dentry->d_inode;
150 struct audit_parent *parent; 150 struct audit_parent *parent;
151 int ret; 151 int ret;
152 152
@@ -353,53 +353,40 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
353} 353}
354 354
355/* Get path information necessary for adding watches. */ 355/* Get path information necessary for adding watches. */
356static int audit_get_nd(char *path, struct nameidata **ndp, struct nameidata **ndw) 356static int audit_get_nd(struct audit_watch *watch, struct path *parent)
357{ 357{
358 struct nameidata *ndparent, *ndwatch; 358 struct nameidata nd;
359 struct dentry *d;
359 int err; 360 int err;
360 361
361 ndparent = kmalloc(sizeof(*ndparent), GFP_KERNEL); 362 err = kern_path_parent(watch->path, &nd);
362 if (unlikely(!ndparent)) 363 if (err)
363 return -ENOMEM; 364 return err;
364 365
365 ndwatch = kmalloc(sizeof(*ndwatch), GFP_KERNEL); 366 if (nd.last_type != LAST_NORM) {
366 if (unlikely(!ndwatch)) { 367 path_put(&nd.path);
367 kfree(ndparent); 368 return -EINVAL;
368 return -ENOMEM;
369 } 369 }
370 370
371 err = path_lookup(path, LOOKUP_PARENT, ndparent); 371 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
372 if (err) { 372 d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
373 kfree(ndparent); 373 if (IS_ERR(d)) {
374 kfree(ndwatch); 374 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
375 return err; 375 path_put(&nd.path);
376 return PTR_ERR(d);
376 } 377 }
377 378 if (d->d_inode) {
378 err = path_lookup(path, 0, ndwatch); 379 /* update watch filter fields */
379 if (err) { 380 watch->dev = d->d_inode->i_sb->s_dev;
380 kfree(ndwatch); 381 watch->ino = d->d_inode->i_ino;
381 ndwatch = NULL;
382 } 382 }
383 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
383 384
384 *ndp = ndparent; 385 *parent = nd.path;
385 *ndw = ndwatch; 386 dput(d);
386
387 return 0; 387 return 0;
388} 388}
389 389
390/* Release resources used for watch path information. */
391static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw)
392{
393 if (ndp) {
394 path_put(&ndp->path);
395 kfree(ndp);
396 }
397 if (ndw) {
398 path_put(&ndw->path);
399 kfree(ndw);
400 }
401}
402
403/* Associate the given rule with an existing parent. 390/* Associate the given rule with an existing parent.
404 * Caller must hold audit_filter_mutex. */ 391 * Caller must hold audit_filter_mutex. */
405static void audit_add_to_parent(struct audit_krule *krule, 392static void audit_add_to_parent(struct audit_krule *krule,
@@ -440,31 +427,24 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
440{ 427{
441 struct audit_watch *watch = krule->watch; 428 struct audit_watch *watch = krule->watch;
442 struct audit_parent *parent; 429 struct audit_parent *parent;
443 struct nameidata *ndp = NULL, *ndw = NULL; 430 struct path parent_path;
444 int h, ret = 0; 431 int h, ret = 0;
445 432
446 mutex_unlock(&audit_filter_mutex); 433 mutex_unlock(&audit_filter_mutex);
447 434
448 /* Avoid calling path_lookup under audit_filter_mutex. */ 435 /* Avoid calling path_lookup under audit_filter_mutex. */
449 ret = audit_get_nd(watch->path, &ndp, &ndw); 436 ret = audit_get_nd(watch, &parent_path);
450 if (ret) {
451 /* caller expects mutex locked */
452 mutex_lock(&audit_filter_mutex);
453 goto error;
454 }
455 437
438 /* caller expects mutex locked */
456 mutex_lock(&audit_filter_mutex); 439 mutex_lock(&audit_filter_mutex);
457 440
458 /* update watch filter fields */ 441 if (ret)
459 if (ndw) { 442 return ret;
460 watch->dev = ndw->path.dentry->d_inode->i_sb->s_dev;
461 watch->ino = ndw->path.dentry->d_inode->i_ino;
462 }
463 443
464 /* either find an old parent or attach a new one */ 444 /* either find an old parent or attach a new one */
465 parent = audit_find_parent(ndp->path.dentry->d_inode); 445 parent = audit_find_parent(parent_path.dentry->d_inode);
466 if (!parent) { 446 if (!parent) {
467 parent = audit_init_parent(ndp); 447 parent = audit_init_parent(&parent_path);
468 if (IS_ERR(parent)) { 448 if (IS_ERR(parent)) {
469 ret = PTR_ERR(parent); 449 ret = PTR_ERR(parent);
470 goto error; 450 goto error;
@@ -479,9 +459,8 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
479 h = audit_hash_ino((u32)watch->ino); 459 h = audit_hash_ino((u32)watch->ino);
480 *list = &audit_inode_hash[h]; 460 *list = &audit_inode_hash[h];
481error: 461error:
482 audit_put_nd(ndp, ndw); /* NULL args OK */ 462 path_put(&parent_path);
483 return ret; 463 return ret;
484
485} 464}
486 465
487void audit_remove_watch_rule(struct audit_krule *krule) 466void audit_remove_watch_rule(struct audit_krule *krule)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b24d7027b83c..95362d15128c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4230,20 +4230,8 @@ void cgroup_post_fork(struct task_struct *child)
4230 */ 4230 */
4231void cgroup_exit(struct task_struct *tsk, int run_callbacks) 4231void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4232{ 4232{
4233 int i;
4234 struct css_set *cg; 4233 struct css_set *cg;
4235 4234 int i;
4236 if (run_callbacks && need_forkexit_callback) {
4237 /*
4238 * modular subsystems can't use callbacks, so no need to lock
4239 * the subsys array
4240 */
4241 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4242 struct cgroup_subsys *ss = subsys[i];
4243 if (ss->exit)
4244 ss->exit(ss, tsk);
4245 }
4246 }
4247 4235
4248 /* 4236 /*
4249 * Unlink from the css_set task list if necessary. 4237 * Unlink from the css_set task list if necessary.
@@ -4261,7 +4249,24 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4261 task_lock(tsk); 4249 task_lock(tsk);
4262 cg = tsk->cgroups; 4250 cg = tsk->cgroups;
4263 tsk->cgroups = &init_css_set; 4251 tsk->cgroups = &init_css_set;
4252
4253 if (run_callbacks && need_forkexit_callback) {
4254 /*
4255 * modular subsystems can't use callbacks, so no need to lock
4256 * the subsys array
4257 */
4258 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4259 struct cgroup_subsys *ss = subsys[i];
4260 if (ss->exit) {
4261 struct cgroup *old_cgrp =
4262 rcu_dereference_raw(cg->subsys[i])->cgroup;
4263 struct cgroup *cgrp = task_cgroup(tsk, i);
4264 ss->exit(ss, cgrp, old_cgrp, tsk);
4265 }
4266 }
4267 }
4264 task_unlock(tsk); 4268 task_unlock(tsk);
4269
4265 if (cg) 4270 if (cg)
4266 put_css_set_taskexit(cg); 4271 put_css_set_taskexit(cg);
4267} 4272}
@@ -4813,6 +4818,29 @@ css_get_next(struct cgroup_subsys *ss, int id,
4813 return ret; 4818 return ret;
4814} 4819}
4815 4820
4821/*
4822 * get corresponding css from file open on cgroupfs directory
4823 */
4824struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id)
4825{
4826 struct cgroup *cgrp;
4827 struct inode *inode;
4828 struct cgroup_subsys_state *css;
4829
4830 inode = f->f_dentry->d_inode;
4831 /* check in cgroup filesystem dir */
4832 if (inode->i_op != &cgroup_dir_inode_operations)
4833 return ERR_PTR(-EBADF);
4834
4835 if (id < 0 || id >= CGROUP_SUBSYS_COUNT)
4836 return ERR_PTR(-EINVAL);
4837
4838 /* get cgroup */
4839 cgrp = __d_cgrp(f->f_dentry);
4840 css = cgrp->subsys[id];
4841 return css ? css : ERR_PTR(-ENOENT);
4842}
4843
4816#ifdef CONFIG_CGROUP_DEBUG 4844#ifdef CONFIG_CGROUP_DEBUG
4817static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss, 4845static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
4818 struct cgroup *cont) 4846 struct cgroup *cont)
diff --git a/kernel/compat.c b/kernel/compat.c
index c9e2ec0b34a8..38b1d2c1cbe8 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -52,6 +52,64 @@ static int compat_put_timeval(struct compat_timeval __user *o,
52 put_user(i->tv_usec, &o->tv_usec)) ? -EFAULT : 0; 52 put_user(i->tv_usec, &o->tv_usec)) ? -EFAULT : 0;
53} 53}
54 54
55static int compat_get_timex(struct timex *txc, struct compat_timex __user *utp)
56{
57 memset(txc, 0, sizeof(struct timex));
58
59 if (!access_ok(VERIFY_READ, utp, sizeof(struct compat_timex)) ||
60 __get_user(txc->modes, &utp->modes) ||
61 __get_user(txc->offset, &utp->offset) ||
62 __get_user(txc->freq, &utp->freq) ||
63 __get_user(txc->maxerror, &utp->maxerror) ||
64 __get_user(txc->esterror, &utp->esterror) ||
65 __get_user(txc->status, &utp->status) ||
66 __get_user(txc->constant, &utp->constant) ||
67 __get_user(txc->precision, &utp->precision) ||
68 __get_user(txc->tolerance, &utp->tolerance) ||
69 __get_user(txc->time.tv_sec, &utp->time.tv_sec) ||
70 __get_user(txc->time.tv_usec, &utp->time.tv_usec) ||
71 __get_user(txc->tick, &utp->tick) ||
72 __get_user(txc->ppsfreq, &utp->ppsfreq) ||
73 __get_user(txc->jitter, &utp->jitter) ||
74 __get_user(txc->shift, &utp->shift) ||
75 __get_user(txc->stabil, &utp->stabil) ||
76 __get_user(txc->jitcnt, &utp->jitcnt) ||
77 __get_user(txc->calcnt, &utp->calcnt) ||
78 __get_user(txc->errcnt, &utp->errcnt) ||
79 __get_user(txc->stbcnt, &utp->stbcnt))
80 return -EFAULT;
81
82 return 0;
83}
84
85static int compat_put_timex(struct compat_timex __user *utp, struct timex *txc)
86{
87 if (!access_ok(VERIFY_WRITE, utp, sizeof(struct compat_timex)) ||
88 __put_user(txc->modes, &utp->modes) ||
89 __put_user(txc->offset, &utp->offset) ||
90 __put_user(txc->freq, &utp->freq) ||
91 __put_user(txc->maxerror, &utp->maxerror) ||
92 __put_user(txc->esterror, &utp->esterror) ||
93 __put_user(txc->status, &utp->status) ||
94 __put_user(txc->constant, &utp->constant) ||
95 __put_user(txc->precision, &utp->precision) ||
96 __put_user(txc->tolerance, &utp->tolerance) ||
97 __put_user(txc->time.tv_sec, &utp->time.tv_sec) ||
98 __put_user(txc->time.tv_usec, &utp->time.tv_usec) ||
99 __put_user(txc->tick, &utp->tick) ||
100 __put_user(txc->ppsfreq, &utp->ppsfreq) ||
101 __put_user(txc->jitter, &utp->jitter) ||
102 __put_user(txc->shift, &utp->shift) ||
103 __put_user(txc->stabil, &utp->stabil) ||
104 __put_user(txc->jitcnt, &utp->jitcnt) ||
105 __put_user(txc->calcnt, &utp->calcnt) ||
106 __put_user(txc->errcnt, &utp->errcnt) ||
107 __put_user(txc->stbcnt, &utp->stbcnt) ||
108 __put_user(txc->tai, &utp->tai))
109 return -EFAULT;
110 return 0;
111}
112
55asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv, 113asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv,
56 struct timezone __user *tz) 114 struct timezone __user *tz)
57{ 115{
@@ -617,6 +675,29 @@ long compat_sys_clock_gettime(clockid_t which_clock,
617 return err; 675 return err;
618} 676}
619 677
678long compat_sys_clock_adjtime(clockid_t which_clock,
679 struct compat_timex __user *utp)
680{
681 struct timex txc;
682 mm_segment_t oldfs;
683 int err, ret;
684
685 err = compat_get_timex(&txc, utp);
686 if (err)
687 return err;
688
689 oldfs = get_fs();
690 set_fs(KERNEL_DS);
691 ret = sys_clock_adjtime(which_clock, (struct timex __user *) &txc);
692 set_fs(oldfs);
693
694 err = compat_put_timex(utp, &txc);
695 if (err)
696 return err;
697
698 return ret;
699}
700
620long compat_sys_clock_getres(clockid_t which_clock, 701long compat_sys_clock_getres(clockid_t which_clock,
621 struct compat_timespec __user *tp) 702 struct compat_timespec __user *tp)
622{ 703{
@@ -951,58 +1032,17 @@ asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset, compat
951asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp) 1032asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp)
952{ 1033{
953 struct timex txc; 1034 struct timex txc;
954 int ret; 1035 int err, ret;
955
956 memset(&txc, 0, sizeof(struct timex));
957 1036
958 if (!access_ok(VERIFY_READ, utp, sizeof(struct compat_timex)) || 1037 err = compat_get_timex(&txc, utp);
959 __get_user(txc.modes, &utp->modes) || 1038 if (err)
960 __get_user(txc.offset, &utp->offset) || 1039 return err;
961 __get_user(txc.freq, &utp->freq) ||
962 __get_user(txc.maxerror, &utp->maxerror) ||
963 __get_user(txc.esterror, &utp->esterror) ||
964 __get_user(txc.status, &utp->status) ||
965 __get_user(txc.constant, &utp->constant) ||
966 __get_user(txc.precision, &utp->precision) ||
967 __get_user(txc.tolerance, &utp->tolerance) ||
968 __get_user(txc.time.tv_sec, &utp->time.tv_sec) ||
969 __get_user(txc.time.tv_usec, &utp->time.tv_usec) ||
970 __get_user(txc.tick, &utp->tick) ||
971 __get_user(txc.ppsfreq, &utp->ppsfreq) ||
972 __get_user(txc.jitter, &utp->jitter) ||
973 __get_user(txc.shift, &utp->shift) ||
974 __get_user(txc.stabil, &utp->stabil) ||
975 __get_user(txc.jitcnt, &utp->jitcnt) ||
976 __get_user(txc.calcnt, &utp->calcnt) ||
977 __get_user(txc.errcnt, &utp->errcnt) ||
978 __get_user(txc.stbcnt, &utp->stbcnt))
979 return -EFAULT;
980 1040
981 ret = do_adjtimex(&txc); 1041 ret = do_adjtimex(&txc);
982 1042
983 if (!access_ok(VERIFY_WRITE, utp, sizeof(struct compat_timex)) || 1043 err = compat_put_timex(utp, &txc);
984 __put_user(txc.modes, &utp->modes) || 1044 if (err)
985 __put_user(txc.offset, &utp->offset) || 1045 return err;
986 __put_user(txc.freq, &utp->freq) ||
987 __put_user(txc.maxerror, &utp->maxerror) ||
988 __put_user(txc.esterror, &utp->esterror) ||
989 __put_user(txc.status, &utp->status) ||
990 __put_user(txc.constant, &utp->constant) ||
991 __put_user(txc.precision, &utp->precision) ||
992 __put_user(txc.tolerance, &utp->tolerance) ||
993 __put_user(txc.time.tv_sec, &utp->time.tv_sec) ||
994 __put_user(txc.time.tv_usec, &utp->time.tv_usec) ||
995 __put_user(txc.tick, &utp->tick) ||
996 __put_user(txc.ppsfreq, &utp->ppsfreq) ||
997 __put_user(txc.jitter, &utp->jitter) ||
998 __put_user(txc.shift, &utp->shift) ||
999 __put_user(txc.stabil, &utp->stabil) ||
1000 __put_user(txc.jitcnt, &utp->jitcnt) ||
1001 __put_user(txc.calcnt, &utp->calcnt) ||
1002 __put_user(txc.errcnt, &utp->errcnt) ||
1003 __put_user(txc.stbcnt, &utp->stbcnt) ||
1004 __put_user(txc.tai, &utp->tai))
1005 ret = -EFAULT;
1006 1046
1007 return ret; 1047 return ret;
1008} 1048}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 4349935c2ad8..e92e98189032 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1575,8 +1575,10 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
1575 return -ENODEV; 1575 return -ENODEV;
1576 1576
1577 trialcs = alloc_trial_cpuset(cs); 1577 trialcs = alloc_trial_cpuset(cs);
1578 if (!trialcs) 1578 if (!trialcs) {
1579 return -ENOMEM; 1579 retval = -ENOMEM;
1580 goto out;
1581 }
1580 1582
1581 switch (cft->private) { 1583 switch (cft->private) {
1582 case FILE_CPULIST: 1584 case FILE_CPULIST:
@@ -1591,6 +1593,7 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
1591 } 1593 }
1592 1594
1593 free_trial_cpuset(trialcs); 1595 free_trial_cpuset(trialcs);
1596out:
1594 cgroup_unlock(); 1597 cgroup_unlock();
1595 return retval; 1598 return retval;
1596} 1599}
diff --git a/kernel/cred.c b/kernel/cred.c
index 3a9d6dd53a6c..2343c132c5a7 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -35,7 +35,7 @@ static struct kmem_cache *cred_jar;
35static struct thread_group_cred init_tgcred = { 35static struct thread_group_cred init_tgcred = {
36 .usage = ATOMIC_INIT(2), 36 .usage = ATOMIC_INIT(2),
37 .tgid = 0, 37 .tgid = 0,
38 .lock = SPIN_LOCK_UNLOCKED, 38 .lock = __SPIN_LOCK_UNLOCKED(init_cred.tgcred.lock),
39}; 39};
40#endif 40#endif
41 41
diff --git a/kernel/futex.c b/kernel/futex.c
index b766d28accd6..bda415715382 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -381,15 +381,16 @@ static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
381 return NULL; 381 return NULL;
382} 382}
383 383
384static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) 384static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
385 u32 uval, u32 newval)
385{ 386{
386 u32 curval; 387 int ret;
387 388
388 pagefault_disable(); 389 pagefault_disable();
389 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); 390 ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
390 pagefault_enable(); 391 pagefault_enable();
391 392
392 return curval; 393 return ret;
393} 394}
394 395
395static int get_futex_value_locked(u32 *dest, u32 __user *from) 396static int get_futex_value_locked(u32 *dest, u32 __user *from)
@@ -674,7 +675,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
674 struct task_struct *task, int set_waiters) 675 struct task_struct *task, int set_waiters)
675{ 676{
676 int lock_taken, ret, ownerdied = 0; 677 int lock_taken, ret, ownerdied = 0;
677 u32 uval, newval, curval; 678 u32 uval, newval, curval, vpid = task_pid_vnr(task);
678 679
679retry: 680retry:
680 ret = lock_taken = 0; 681 ret = lock_taken = 0;
@@ -684,19 +685,17 @@ retry:
684 * (by doing a 0 -> TID atomic cmpxchg), while holding all 685 * (by doing a 0 -> TID atomic cmpxchg), while holding all
685 * the locks. It will most likely not succeed. 686 * the locks. It will most likely not succeed.
686 */ 687 */
687 newval = task_pid_vnr(task); 688 newval = vpid;
688 if (set_waiters) 689 if (set_waiters)
689 newval |= FUTEX_WAITERS; 690 newval |= FUTEX_WAITERS;
690 691
691 curval = cmpxchg_futex_value_locked(uaddr, 0, newval); 692 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval)))
692
693 if (unlikely(curval == -EFAULT))
694 return -EFAULT; 693 return -EFAULT;
695 694
696 /* 695 /*
697 * Detect deadlocks. 696 * Detect deadlocks.
698 */ 697 */
699 if ((unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(task)))) 698 if ((unlikely((curval & FUTEX_TID_MASK) == vpid)))
700 return -EDEADLK; 699 return -EDEADLK;
701 700
702 /* 701 /*
@@ -723,14 +722,12 @@ retry:
723 */ 722 */
724 if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) { 723 if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
725 /* Keep the OWNER_DIED bit */ 724 /* Keep the OWNER_DIED bit */
726 newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(task); 725 newval = (curval & ~FUTEX_TID_MASK) | vpid;
727 ownerdied = 0; 726 ownerdied = 0;
728 lock_taken = 1; 727 lock_taken = 1;
729 } 728 }
730 729
731 curval = cmpxchg_futex_value_locked(uaddr, uval, newval); 730 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
732
733 if (unlikely(curval == -EFAULT))
734 return -EFAULT; 731 return -EFAULT;
735 if (unlikely(curval != uval)) 732 if (unlikely(curval != uval))
736 goto retry; 733 goto retry;
@@ -775,6 +772,24 @@ retry:
775 return ret; 772 return ret;
776} 773}
777 774
775/**
776 * __unqueue_futex() - Remove the futex_q from its futex_hash_bucket
777 * @q: The futex_q to unqueue
778 *
779 * The q->lock_ptr must not be NULL and must be held by the caller.
780 */
781static void __unqueue_futex(struct futex_q *q)
782{
783 struct futex_hash_bucket *hb;
784
785 if (WARN_ON(!q->lock_ptr || !spin_is_locked(q->lock_ptr)
786 || plist_node_empty(&q->list)))
787 return;
788
789 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
790 plist_del(&q->list, &hb->chain);
791}
792
778/* 793/*
779 * The hash bucket lock must be held when this is called. 794 * The hash bucket lock must be held when this is called.
780 * Afterwards, the futex_q must not be accessed. 795 * Afterwards, the futex_q must not be accessed.
@@ -792,7 +807,7 @@ static void wake_futex(struct futex_q *q)
792 */ 807 */
793 get_task_struct(p); 808 get_task_struct(p);
794 809
795 plist_del(&q->list, &q->list.plist); 810 __unqueue_futex(q);
796 /* 811 /*
797 * The waiting task can free the futex_q as soon as 812 * The waiting task can free the futex_q as soon as
798 * q->lock_ptr = NULL is written, without taking any locks. A 813 * q->lock_ptr = NULL is written, without taking any locks. A
@@ -843,9 +858,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
843 858
844 newval = FUTEX_WAITERS | task_pid_vnr(new_owner); 859 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
845 860
846 curval = cmpxchg_futex_value_locked(uaddr, uval, newval); 861 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
847
848 if (curval == -EFAULT)
849 ret = -EFAULT; 862 ret = -EFAULT;
850 else if (curval != uval) 863 else if (curval != uval)
851 ret = -EINVAL; 864 ret = -EINVAL;
@@ -880,10 +893,8 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
880 * There is no waiter, so we unlock the futex. The owner died 893 * There is no waiter, so we unlock the futex. The owner died
881 * bit has not to be preserved here. We are the owner: 894 * bit has not to be preserved here. We are the owner:
882 */ 895 */
883 oldval = cmpxchg_futex_value_locked(uaddr, uval, 0); 896 if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0))
884 897 return -EFAULT;
885 if (oldval == -EFAULT)
886 return oldval;
887 if (oldval != uval) 898 if (oldval != uval)
888 return -EAGAIN; 899 return -EAGAIN;
889 900
@@ -1071,9 +1082,6 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1071 plist_del(&q->list, &hb1->chain); 1082 plist_del(&q->list, &hb1->chain);
1072 plist_add(&q->list, &hb2->chain); 1083 plist_add(&q->list, &hb2->chain);
1073 q->lock_ptr = &hb2->lock; 1084 q->lock_ptr = &hb2->lock;
1074#ifdef CONFIG_DEBUG_PI_LIST
1075 q->list.plist.spinlock = &hb2->lock;
1076#endif
1077 } 1085 }
1078 get_futex_key_refs(key2); 1086 get_futex_key_refs(key2);
1079 q->key = *key2; 1087 q->key = *key2;
@@ -1100,16 +1108,12 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1100 get_futex_key_refs(key); 1108 get_futex_key_refs(key);
1101 q->key = *key; 1109 q->key = *key;
1102 1110
1103 WARN_ON(plist_node_empty(&q->list)); 1111 __unqueue_futex(q);
1104 plist_del(&q->list, &q->list.plist);
1105 1112
1106 WARN_ON(!q->rt_waiter); 1113 WARN_ON(!q->rt_waiter);
1107 q->rt_waiter = NULL; 1114 q->rt_waiter = NULL;
1108 1115
1109 q->lock_ptr = &hb->lock; 1116 q->lock_ptr = &hb->lock;
1110#ifdef CONFIG_DEBUG_PI_LIST
1111 q->list.plist.spinlock = &hb->lock;
1112#endif
1113 1117
1114 wake_up_state(q->task, TASK_NORMAL); 1118 wake_up_state(q->task, TASK_NORMAL);
1115} 1119}
@@ -1457,9 +1461,6 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1457 prio = min(current->normal_prio, MAX_RT_PRIO); 1461 prio = min(current->normal_prio, MAX_RT_PRIO);
1458 1462
1459 plist_node_init(&q->list, prio); 1463 plist_node_init(&q->list, prio);
1460#ifdef CONFIG_DEBUG_PI_LIST
1461 q->list.plist.spinlock = &hb->lock;
1462#endif
1463 plist_add(&q->list, &hb->chain); 1464 plist_add(&q->list, &hb->chain);
1464 q->task = current; 1465 q->task = current;
1465 spin_unlock(&hb->lock); 1466 spin_unlock(&hb->lock);
@@ -1504,8 +1505,7 @@ retry:
1504 spin_unlock(lock_ptr); 1505 spin_unlock(lock_ptr);
1505 goto retry; 1506 goto retry;
1506 } 1507 }
1507 WARN_ON(plist_node_empty(&q->list)); 1508 __unqueue_futex(q);
1508 plist_del(&q->list, &q->list.plist);
1509 1509
1510 BUG_ON(q->pi_state); 1510 BUG_ON(q->pi_state);
1511 1511
@@ -1525,8 +1525,7 @@ retry:
1525static void unqueue_me_pi(struct futex_q *q) 1525static void unqueue_me_pi(struct futex_q *q)
1526 __releases(q->lock_ptr) 1526 __releases(q->lock_ptr)
1527{ 1527{
1528 WARN_ON(plist_node_empty(&q->list)); 1528 __unqueue_futex(q);
1529 plist_del(&q->list, &q->list.plist);
1530 1529
1531 BUG_ON(!q->pi_state); 1530 BUG_ON(!q->pi_state);
1532 free_pi_state(q->pi_state); 1531 free_pi_state(q->pi_state);
@@ -1556,10 +1555,10 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1556 1555
1557 /* 1556 /*
1558 * We are here either because we stole the rtmutex from the 1557 * We are here either because we stole the rtmutex from the
1559 * pending owner or we are the pending owner which failed to 1558 * previous highest priority waiter or we are the highest priority
1560 * get the rtmutex. We have to replace the pending owner TID 1559 * waiter but failed to get the rtmutex the first time.
1561 * in the user space variable. This must be atomic as we have 1560 * We have to replace the newowner TID in the user space variable.
1562 * to preserve the owner died bit here. 1561 * This must be atomic as we have to preserve the owner died bit here.
1563 * 1562 *
1564 * Note: We write the user space value _before_ changing the pi_state 1563 * Note: We write the user space value _before_ changing the pi_state
1565 * because we can fault here. Imagine swapped out pages or a fork 1564 * because we can fault here. Imagine swapped out pages or a fork
@@ -1578,9 +1577,7 @@ retry:
1578 while (1) { 1577 while (1) {
1579 newval = (uval & FUTEX_OWNER_DIED) | newtid; 1578 newval = (uval & FUTEX_OWNER_DIED) | newtid;
1580 1579
1581 curval = cmpxchg_futex_value_locked(uaddr, uval, newval); 1580 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
1582
1583 if (curval == -EFAULT)
1584 goto handle_fault; 1581 goto handle_fault;
1585 if (curval == uval) 1582 if (curval == uval)
1586 break; 1583 break;
@@ -1608,8 +1605,8 @@ retry:
1608 1605
1609 /* 1606 /*
1610 * To handle the page fault we need to drop the hash bucket 1607 * To handle the page fault we need to drop the hash bucket
1611 * lock here. That gives the other task (either the pending 1608 * lock here. That gives the other task (either the highest priority
1612 * owner itself or the task which stole the rtmutex) the 1609 * waiter itself or the task which stole the rtmutex) the
1613 * chance to try the fixup of the pi_state. So once we are 1610 * chance to try the fixup of the pi_state. So once we are
1614 * back from handling the fault we need to check the pi_state 1611 * back from handling the fault we need to check the pi_state
1615 * after reacquiring the hash bucket lock and before trying to 1612 * after reacquiring the hash bucket lock and before trying to
@@ -1685,18 +1682,20 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
1685 /* 1682 /*
1686 * pi_state is incorrect, some other task did a lock steal and 1683 * pi_state is incorrect, some other task did a lock steal and
1687 * we returned due to timeout or signal without taking the 1684 * we returned due to timeout or signal without taking the
1688 * rt_mutex. Too late. We can access the rt_mutex_owner without 1685 * rt_mutex. Too late.
1689 * locking, as the other task is now blocked on the hash bucket
1690 * lock. Fix the state up.
1691 */ 1686 */
1687 raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
1692 owner = rt_mutex_owner(&q->pi_state->pi_mutex); 1688 owner = rt_mutex_owner(&q->pi_state->pi_mutex);
1689 if (!owner)
1690 owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
1691 raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
1693 ret = fixup_pi_state_owner(uaddr, q, owner); 1692 ret = fixup_pi_state_owner(uaddr, q, owner);
1694 goto out; 1693 goto out;
1695 } 1694 }
1696 1695
1697 /* 1696 /*
1698 * Paranoia check. If we did not take the lock, then we should not be 1697 * Paranoia check. If we did not take the lock, then we should not be
1699 * the owner, nor the pending owner, of the rt_mutex. 1698 * the owner of the rt_mutex.
1700 */ 1699 */
1701 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) 1700 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
1702 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p " 1701 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
@@ -1781,13 +1780,14 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
1781 * 1780 *
1782 * The basic logical guarantee of a futex is that it blocks ONLY 1781 * The basic logical guarantee of a futex is that it blocks ONLY
1783 * if cond(var) is known to be true at the time of blocking, for 1782 * if cond(var) is known to be true at the time of blocking, for
1784 * any cond. If we queued after testing *uaddr, that would open 1783 * any cond. If we locked the hash-bucket after testing *uaddr, that
1785 * a race condition where we could block indefinitely with 1784 * would open a race condition where we could block indefinitely with
1786 * cond(var) false, which would violate the guarantee. 1785 * cond(var) false, which would violate the guarantee.
1787 * 1786 *
1788 * A consequence is that futex_wait() can return zero and absorb 1787 * On the other hand, we insert q and release the hash-bucket only
1789 * a wakeup when *uaddr != val on entry to the syscall. This is 1788 * after testing *uaddr. This guarantees that futex_wait() will NOT
1790 * rare, but normal. 1789 * absorb a wakeup if *uaddr does not match the desired values
1790 * while the syscall executes.
1791 */ 1791 */
1792retry: 1792retry:
1793 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key); 1793 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key);
@@ -2046,9 +2046,9 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
2046{ 2046{
2047 struct futex_hash_bucket *hb; 2047 struct futex_hash_bucket *hb;
2048 struct futex_q *this, *next; 2048 struct futex_q *this, *next;
2049 u32 uval;
2050 struct plist_head *head; 2049 struct plist_head *head;
2051 union futex_key key = FUTEX_KEY_INIT; 2050 union futex_key key = FUTEX_KEY_INIT;
2051 u32 uval, vpid = task_pid_vnr(current);
2052 int ret; 2052 int ret;
2053 2053
2054retry: 2054retry:
@@ -2057,7 +2057,7 @@ retry:
2057 /* 2057 /*
2058 * We release only a lock we actually own: 2058 * We release only a lock we actually own:
2059 */ 2059 */
2060 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current)) 2060 if ((uval & FUTEX_TID_MASK) != vpid)
2061 return -EPERM; 2061 return -EPERM;
2062 2062
2063 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key); 2063 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key);
@@ -2072,17 +2072,14 @@ retry:
2072 * again. If it succeeds then we can return without waking 2072 * again. If it succeeds then we can return without waking
2073 * anyone else up: 2073 * anyone else up:
2074 */ 2074 */
2075 if (!(uval & FUTEX_OWNER_DIED)) 2075 if (!(uval & FUTEX_OWNER_DIED) &&
2076 uval = cmpxchg_futex_value_locked(uaddr, task_pid_vnr(current), 0); 2076 cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0))
2077
2078
2079 if (unlikely(uval == -EFAULT))
2080 goto pi_faulted; 2077 goto pi_faulted;
2081 /* 2078 /*
2082 * Rare case: we managed to release the lock atomically, 2079 * Rare case: we managed to release the lock atomically,
2083 * no need to wake anyone else up: 2080 * no need to wake anyone else up:
2084 */ 2081 */
2085 if (unlikely(uval == task_pid_vnr(current))) 2082 if (unlikely(uval == vpid))
2086 goto out_unlock; 2083 goto out_unlock;
2087 2084
2088 /* 2085 /*
@@ -2167,7 +2164,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2167 * We were woken prior to requeue by a timeout or a signal. 2164 * We were woken prior to requeue by a timeout or a signal.
2168 * Unqueue the futex_q and determine which it was. 2165 * Unqueue the futex_q and determine which it was.
2169 */ 2166 */
2170 plist_del(&q->list, &q->list.plist); 2167 plist_del(&q->list, &hb->chain);
2171 2168
2172 /* Handle spurious wakeups gracefully */ 2169 /* Handle spurious wakeups gracefully */
2173 ret = -EWOULDBLOCK; 2170 ret = -EWOULDBLOCK;
@@ -2463,11 +2460,20 @@ retry:
2463 * userspace. 2460 * userspace.
2464 */ 2461 */
2465 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; 2462 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
2466 nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval); 2463 /*
2467 2464 * We are not holding a lock here, but we want to have
2468 if (nval == -EFAULT) 2465 * the pagefault_disable/enable() protection because
2469 return -1; 2466 * we want to handle the fault gracefully. If the
2470 2467 * access fails we try to fault in the futex with R/W
2468 * verification via get_user_pages. get_user() above
2469 * does not guarantee R/W access. If that fails we
2470 * give up and leave the futex locked.
2471 */
2472 if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) {
2473 if (fault_in_user_writeable(uaddr))
2474 return -1;
2475 goto retry;
2476 }
2471 if (nval != uval) 2477 if (nval != uval)
2472 goto retry; 2478 goto retry;
2473 2479
@@ -2678,8 +2684,7 @@ static int __init futex_init(void)
2678 * implementation, the non-functional ones will return 2684 * implementation, the non-functional ones will return
2679 * -ENOSYS. 2685 * -ENOSYS.
2680 */ 2686 */
2681 curval = cmpxchg_futex_value_locked(NULL, 0, 0); 2687 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
2682 if (curval == -EFAULT)
2683 futex_cmpxchg_enabled = 1; 2688 futex_cmpxchg_enabled = 1;
2684 2689
2685 for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { 2690 for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 0c8d7c048615..9017478c5d4c 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -53,11 +53,10 @@
53/* 53/*
54 * The timer bases: 54 * The timer bases:
55 * 55 *
56 * Note: If we want to add new timer bases, we have to skip the two 56 * There are more clockids then hrtimer bases. Thus, we index
57 * clock ids captured by the cpu-timers. We do this by holding empty 57 * into the timer bases by the hrtimer_base_type enum. When trying
58 * entries rather than doing math adjustment of the clock ids. 58 * to reach a base using a clockid, hrtimer_clockid_to_base()
59 * This ensures that we capture erroneous accesses to these clock ids 59 * is used to convert from clockid to the proper hrtimer_base_type.
60 * rather than moving them into the range of valid clock id's.
61 */ 60 */
62DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = 61DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
63{ 62{
@@ -74,30 +73,39 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
74 .get_time = &ktime_get, 73 .get_time = &ktime_get,
75 .resolution = KTIME_LOW_RES, 74 .resolution = KTIME_LOW_RES,
76 }, 75 },
76 {
77 .index = CLOCK_BOOTTIME,
78 .get_time = &ktime_get_boottime,
79 .resolution = KTIME_LOW_RES,
80 },
77 } 81 }
78}; 82};
79 83
84static int hrtimer_clock_to_base_table[MAX_CLOCKS];
85
86static inline int hrtimer_clockid_to_base(clockid_t clock_id)
87{
88 return hrtimer_clock_to_base_table[clock_id];
89}
90
91
80/* 92/*
81 * Get the coarse grained time at the softirq based on xtime and 93 * Get the coarse grained time at the softirq based on xtime and
82 * wall_to_monotonic. 94 * wall_to_monotonic.
83 */ 95 */
84static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) 96static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
85{ 97{
86 ktime_t xtim, tomono; 98 ktime_t xtim, mono, boot;
87 struct timespec xts, tom; 99 struct timespec xts, tom, slp;
88 unsigned long seq;
89 100
90 do { 101 get_xtime_and_monotonic_and_sleep_offset(&xts, &tom, &slp);
91 seq = read_seqbegin(&xtime_lock);
92 xts = __current_kernel_time();
93 tom = __get_wall_to_monotonic();
94 } while (read_seqretry(&xtime_lock, seq));
95 102
96 xtim = timespec_to_ktime(xts); 103 xtim = timespec_to_ktime(xts);
97 tomono = timespec_to_ktime(tom); 104 mono = ktime_add(xtim, timespec_to_ktime(tom));
98 base->clock_base[CLOCK_REALTIME].softirq_time = xtim; 105 boot = ktime_add(mono, timespec_to_ktime(slp));
99 base->clock_base[CLOCK_MONOTONIC].softirq_time = 106 base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
100 ktime_add(xtim, tomono); 107 base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
108 base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
101} 109}
102 110
103/* 111/*
@@ -184,10 +192,11 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
184 struct hrtimer_cpu_base *new_cpu_base; 192 struct hrtimer_cpu_base *new_cpu_base;
185 int this_cpu = smp_processor_id(); 193 int this_cpu = smp_processor_id();
186 int cpu = hrtimer_get_target(this_cpu, pinned); 194 int cpu = hrtimer_get_target(this_cpu, pinned);
195 int basenum = hrtimer_clockid_to_base(base->index);
187 196
188again: 197again:
189 new_cpu_base = &per_cpu(hrtimer_bases, cpu); 198 new_cpu_base = &per_cpu(hrtimer_bases, cpu);
190 new_base = &new_cpu_base->clock_base[base->index]; 199 new_base = &new_cpu_base->clock_base[basenum];
191 200
192 if (base != new_base) { 201 if (base != new_base) {
193 /* 202 /*
@@ -334,6 +343,11 @@ EXPORT_SYMBOL_GPL(ktime_add_safe);
334 343
335static struct debug_obj_descr hrtimer_debug_descr; 344static struct debug_obj_descr hrtimer_debug_descr;
336 345
346static void *hrtimer_debug_hint(void *addr)
347{
348 return ((struct hrtimer *) addr)->function;
349}
350
337/* 351/*
338 * fixup_init is called when: 352 * fixup_init is called when:
339 * - an active object is initialized 353 * - an active object is initialized
@@ -393,6 +407,7 @@ static int hrtimer_fixup_free(void *addr, enum debug_obj_state state)
393 407
394static struct debug_obj_descr hrtimer_debug_descr = { 408static struct debug_obj_descr hrtimer_debug_descr = {
395 .name = "hrtimer", 409 .name = "hrtimer",
410 .debug_hint = hrtimer_debug_hint,
396 .fixup_init = hrtimer_fixup_init, 411 .fixup_init = hrtimer_fixup_init,
397 .fixup_activate = hrtimer_fixup_activate, 412 .fixup_activate = hrtimer_fixup_activate,
398 .fixup_free = hrtimer_fixup_free, 413 .fixup_free = hrtimer_fixup_free,
@@ -611,24 +626,23 @@ static int hrtimer_reprogram(struct hrtimer *timer,
611static void retrigger_next_event(void *arg) 626static void retrigger_next_event(void *arg)
612{ 627{
613 struct hrtimer_cpu_base *base; 628 struct hrtimer_cpu_base *base;
614 struct timespec realtime_offset, wtm; 629 struct timespec realtime_offset, wtm, sleep;
615 unsigned long seq;
616 630
617 if (!hrtimer_hres_active()) 631 if (!hrtimer_hres_active())
618 return; 632 return;
619 633
620 do { 634 get_xtime_and_monotonic_and_sleep_offset(&realtime_offset, &wtm,
621 seq = read_seqbegin(&xtime_lock); 635 &sleep);
622 wtm = __get_wall_to_monotonic();
623 } while (read_seqretry(&xtime_lock, seq));
624 set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec); 636 set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
625 637
626 base = &__get_cpu_var(hrtimer_bases); 638 base = &__get_cpu_var(hrtimer_bases);
627 639
628 /* Adjust CLOCK_REALTIME offset */ 640 /* Adjust CLOCK_REALTIME offset */
629 raw_spin_lock(&base->lock); 641 raw_spin_lock(&base->lock);
630 base->clock_base[CLOCK_REALTIME].offset = 642 base->clock_base[HRTIMER_BASE_REALTIME].offset =
631 timespec_to_ktime(realtime_offset); 643 timespec_to_ktime(realtime_offset);
644 base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
645 timespec_to_ktime(sleep);
632 646
633 hrtimer_force_reprogram(base, 0); 647 hrtimer_force_reprogram(base, 0);
634 raw_spin_unlock(&base->lock); 648 raw_spin_unlock(&base->lock);
@@ -673,14 +687,6 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
673} 687}
674 688
675/* 689/*
676 * Initialize the high resolution related parts of a hrtimer
677 */
678static inline void hrtimer_init_timer_hres(struct hrtimer *timer)
679{
680}
681
682
683/*
684 * When High resolution timers are active, try to reprogram. Note, that in case 690 * When High resolution timers are active, try to reprogram. Note, that in case
685 * the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry 691 * the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry
686 * check happens. The timer gets enqueued into the rbtree. The reprogramming 692 * check happens. The timer gets enqueued into the rbtree. The reprogramming
@@ -725,8 +731,9 @@ static int hrtimer_switch_to_hres(void)
725 return 0; 731 return 0;
726 } 732 }
727 base->hres_active = 1; 733 base->hres_active = 1;
728 base->clock_base[CLOCK_REALTIME].resolution = KTIME_HIGH_RES; 734 base->clock_base[HRTIMER_BASE_REALTIME].resolution = KTIME_HIGH_RES;
729 base->clock_base[CLOCK_MONOTONIC].resolution = KTIME_HIGH_RES; 735 base->clock_base[HRTIMER_BASE_MONOTONIC].resolution = KTIME_HIGH_RES;
736 base->clock_base[HRTIMER_BASE_BOOTTIME].resolution = KTIME_HIGH_RES;
730 737
731 tick_setup_sched_timer(); 738 tick_setup_sched_timer();
732 739
@@ -750,7 +757,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
750 return 0; 757 return 0;
751} 758}
752static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } 759static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
753static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
754 760
755#endif /* CONFIG_HIGH_RES_TIMERS */ 761#endif /* CONFIG_HIGH_RES_TIMERS */
756 762
@@ -1121,6 +1127,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
1121 enum hrtimer_mode mode) 1127 enum hrtimer_mode mode)
1122{ 1128{
1123 struct hrtimer_cpu_base *cpu_base; 1129 struct hrtimer_cpu_base *cpu_base;
1130 int base;
1124 1131
1125 memset(timer, 0, sizeof(struct hrtimer)); 1132 memset(timer, 0, sizeof(struct hrtimer));
1126 1133
@@ -1129,8 +1136,8 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
1129 if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS) 1136 if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS)
1130 clock_id = CLOCK_MONOTONIC; 1137 clock_id = CLOCK_MONOTONIC;
1131 1138
1132 timer->base = &cpu_base->clock_base[clock_id]; 1139 base = hrtimer_clockid_to_base(clock_id);
1133 hrtimer_init_timer_hres(timer); 1140 timer->base = &cpu_base->clock_base[base];
1134 timerqueue_init(&timer->node); 1141 timerqueue_init(&timer->node);
1135 1142
1136#ifdef CONFIG_TIMER_STATS 1143#ifdef CONFIG_TIMER_STATS
@@ -1165,9 +1172,10 @@ EXPORT_SYMBOL_GPL(hrtimer_init);
1165int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) 1172int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
1166{ 1173{
1167 struct hrtimer_cpu_base *cpu_base; 1174 struct hrtimer_cpu_base *cpu_base;
1175 int base = hrtimer_clockid_to_base(which_clock);
1168 1176
1169 cpu_base = &__raw_get_cpu_var(hrtimer_bases); 1177 cpu_base = &__raw_get_cpu_var(hrtimer_bases);
1170 *tp = ktime_to_timespec(cpu_base->clock_base[which_clock].resolution); 1178 *tp = ktime_to_timespec(cpu_base->clock_base[base].resolution);
1171 1179
1172 return 0; 1180 return 0;
1173} 1181}
@@ -1714,6 +1722,10 @@ static struct notifier_block __cpuinitdata hrtimers_nb = {
1714 1722
1715void __init hrtimers_init(void) 1723void __init hrtimers_init(void)
1716{ 1724{
1725 hrtimer_clock_to_base_table[CLOCK_REALTIME] = HRTIMER_BASE_REALTIME;
1726 hrtimer_clock_to_base_table[CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC;
1727 hrtimer_clock_to_base_table[CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME;
1728
1717 hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, 1729 hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
1718 (void *)(long)smp_processor_id()); 1730 (void *)(long)smp_processor_id());
1719 register_cpu_notifier(&hrtimers_nb); 1731 register_cpu_notifier(&hrtimers_nb);
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 8e42fec7686d..09bef82d74cb 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -1,5 +1,6 @@
1# Select this to activate the generic irq options below
1config HAVE_GENERIC_HARDIRQS 2config HAVE_GENERIC_HARDIRQS
2 def_bool n 3 bool
3 4
4if HAVE_GENERIC_HARDIRQS 5if HAVE_GENERIC_HARDIRQS
5menu "IRQ subsystem" 6menu "IRQ subsystem"
@@ -11,26 +12,44 @@ config GENERIC_HARDIRQS
11 12
12# Select this to disable the deprecated stuff 13# Select this to disable the deprecated stuff
13config GENERIC_HARDIRQS_NO_DEPRECATED 14config GENERIC_HARDIRQS_NO_DEPRECATED
14 def_bool n 15 bool
16
17config GENERIC_HARDIRQS_NO_COMPAT
18 bool
15 19
16# Options selectable by the architecture code 20# Options selectable by the architecture code
21
22# Make sparse irq Kconfig switch below available
17config HAVE_SPARSE_IRQ 23config HAVE_SPARSE_IRQ
18 def_bool n 24 bool
19 25
26# Enable the generic irq autoprobe mechanism
20config GENERIC_IRQ_PROBE 27config GENERIC_IRQ_PROBE
21 def_bool n 28 bool
29
30# Use the generic /proc/interrupts implementation
31config GENERIC_IRQ_SHOW
32 bool
22 33
34# Support for delayed migration from interrupt context
23config GENERIC_PENDING_IRQ 35config GENERIC_PENDING_IRQ
24 def_bool n 36 bool
25 37
38# Alpha specific irq affinity mechanism
26config AUTO_IRQ_AFFINITY 39config AUTO_IRQ_AFFINITY
27 def_bool n 40 bool
28
29config IRQ_PER_CPU
30 def_bool n
31 41
42# Tasklet based software resend for pending interrupts on enable_irq()
32config HARDIRQS_SW_RESEND 43config HARDIRQS_SW_RESEND
33 def_bool n 44 bool
45
46# Preflow handler support for fasteoi (sparc64)
47config IRQ_PREFLOW_FASTEOI
48 bool
49
50# Support forced irq threading
51config IRQ_FORCED_THREADING
52 bool
34 53
35config SPARSE_IRQ 54config SPARSE_IRQ
36 bool "Support sparse irq numbering" 55 bool "Support sparse irq numbering"
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 505798f86c36..394784c57060 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -17,7 +17,7 @@
17/* 17/*
18 * Autodetection depends on the fact that any interrupt that 18 * Autodetection depends on the fact that any interrupt that
19 * comes in on to an unassigned handler will get stuck with 19 * comes in on to an unassigned handler will get stuck with
20 * "IRQ_WAITING" cleared and the interrupt disabled. 20 * "IRQS_WAITING" cleared and the interrupt disabled.
21 */ 21 */
22static DEFINE_MUTEX(probing_active); 22static DEFINE_MUTEX(probing_active);
23 23
@@ -32,7 +32,6 @@ unsigned long probe_irq_on(void)
32{ 32{
33 struct irq_desc *desc; 33 struct irq_desc *desc;
34 unsigned long mask = 0; 34 unsigned long mask = 0;
35 unsigned int status;
36 int i; 35 int i;
37 36
38 /* 37 /*
@@ -46,13 +45,7 @@ unsigned long probe_irq_on(void)
46 */ 45 */
47 for_each_irq_desc_reverse(i, desc) { 46 for_each_irq_desc_reverse(i, desc) {
48 raw_spin_lock_irq(&desc->lock); 47 raw_spin_lock_irq(&desc->lock);
49 if (!desc->action && !(desc->status & IRQ_NOPROBE)) { 48 if (!desc->action && irq_settings_can_probe(desc)) {
50 /*
51 * An old-style architecture might still have
52 * the handle_bad_irq handler there:
53 */
54 compat_irq_chip_set_default_handler(desc);
55
56 /* 49 /*
57 * Some chips need to know about probing in 50 * Some chips need to know about probing in
58 * progress: 51 * progress:
@@ -60,7 +53,7 @@ unsigned long probe_irq_on(void)
60 if (desc->irq_data.chip->irq_set_type) 53 if (desc->irq_data.chip->irq_set_type)
61 desc->irq_data.chip->irq_set_type(&desc->irq_data, 54 desc->irq_data.chip->irq_set_type(&desc->irq_data,
62 IRQ_TYPE_PROBE); 55 IRQ_TYPE_PROBE);
63 desc->irq_data.chip->irq_startup(&desc->irq_data); 56 irq_startup(desc);
64 } 57 }
65 raw_spin_unlock_irq(&desc->lock); 58 raw_spin_unlock_irq(&desc->lock);
66 } 59 }
@@ -75,10 +68,12 @@ unsigned long probe_irq_on(void)
75 */ 68 */
76 for_each_irq_desc_reverse(i, desc) { 69 for_each_irq_desc_reverse(i, desc) {
77 raw_spin_lock_irq(&desc->lock); 70 raw_spin_lock_irq(&desc->lock);
78 if (!desc->action && !(desc->status & IRQ_NOPROBE)) { 71 if (!desc->action && irq_settings_can_probe(desc)) {
79 desc->status |= IRQ_AUTODETECT | IRQ_WAITING; 72 desc->istate |= IRQS_AUTODETECT | IRQS_WAITING;
80 if (desc->irq_data.chip->irq_startup(&desc->irq_data)) 73 if (irq_startup(desc)) {
81 desc->status |= IRQ_PENDING; 74 irq_compat_set_pending(desc);
75 desc->istate |= IRQS_PENDING;
76 }
82 } 77 }
83 raw_spin_unlock_irq(&desc->lock); 78 raw_spin_unlock_irq(&desc->lock);
84 } 79 }
@@ -93,13 +88,12 @@ unsigned long probe_irq_on(void)
93 */ 88 */
94 for_each_irq_desc(i, desc) { 89 for_each_irq_desc(i, desc) {
95 raw_spin_lock_irq(&desc->lock); 90 raw_spin_lock_irq(&desc->lock);
96 status = desc->status;
97 91
98 if (status & IRQ_AUTODETECT) { 92 if (desc->istate & IRQS_AUTODETECT) {
99 /* It triggered already - consider it spurious. */ 93 /* It triggered already - consider it spurious. */
100 if (!(status & IRQ_WAITING)) { 94 if (!(desc->istate & IRQS_WAITING)) {
101 desc->status = status & ~IRQ_AUTODETECT; 95 desc->istate &= ~IRQS_AUTODETECT;
102 desc->irq_data.chip->irq_shutdown(&desc->irq_data); 96 irq_shutdown(desc);
103 } else 97 } else
104 if (i < 32) 98 if (i < 32)
105 mask |= 1 << i; 99 mask |= 1 << i;
@@ -125,20 +119,18 @@ EXPORT_SYMBOL(probe_irq_on);
125 */ 119 */
126unsigned int probe_irq_mask(unsigned long val) 120unsigned int probe_irq_mask(unsigned long val)
127{ 121{
128 unsigned int status, mask = 0; 122 unsigned int mask = 0;
129 struct irq_desc *desc; 123 struct irq_desc *desc;
130 int i; 124 int i;
131 125
132 for_each_irq_desc(i, desc) { 126 for_each_irq_desc(i, desc) {
133 raw_spin_lock_irq(&desc->lock); 127 raw_spin_lock_irq(&desc->lock);
134 status = desc->status; 128 if (desc->istate & IRQS_AUTODETECT) {
135 129 if (i < 16 && !(desc->istate & IRQS_WAITING))
136 if (status & IRQ_AUTODETECT) {
137 if (i < 16 && !(status & IRQ_WAITING))
138 mask |= 1 << i; 130 mask |= 1 << i;
139 131
140 desc->status = status & ~IRQ_AUTODETECT; 132 desc->istate &= ~IRQS_AUTODETECT;
141 desc->irq_data.chip->irq_shutdown(&desc->irq_data); 133 irq_shutdown(desc);
142 } 134 }
143 raw_spin_unlock_irq(&desc->lock); 135 raw_spin_unlock_irq(&desc->lock);
144 } 136 }
@@ -169,20 +161,18 @@ int probe_irq_off(unsigned long val)
169{ 161{
170 int i, irq_found = 0, nr_of_irqs = 0; 162 int i, irq_found = 0, nr_of_irqs = 0;
171 struct irq_desc *desc; 163 struct irq_desc *desc;
172 unsigned int status;
173 164
174 for_each_irq_desc(i, desc) { 165 for_each_irq_desc(i, desc) {
175 raw_spin_lock_irq(&desc->lock); 166 raw_spin_lock_irq(&desc->lock);
176 status = desc->status;
177 167
178 if (status & IRQ_AUTODETECT) { 168 if (desc->istate & IRQS_AUTODETECT) {
179 if (!(status & IRQ_WAITING)) { 169 if (!(desc->istate & IRQS_WAITING)) {
180 if (!nr_of_irqs) 170 if (!nr_of_irqs)
181 irq_found = i; 171 irq_found = i;
182 nr_of_irqs++; 172 nr_of_irqs++;
183 } 173 }
184 desc->status = status & ~IRQ_AUTODETECT; 174 desc->istate &= ~IRQS_AUTODETECT;
185 desc->irq_data.chip->irq_shutdown(&desc->irq_data); 175 irq_shutdown(desc);
186 } 176 }
187 raw_spin_unlock_irq(&desc->lock); 177 raw_spin_unlock_irq(&desc->lock);
188 } 178 }
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index baa5c4acad83..c9c0601f0615 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -19,140 +19,110 @@
19#include "internals.h" 19#include "internals.h"
20 20
21/** 21/**
22 * set_irq_chip - set the irq chip for an irq 22 * irq_set_chip - set the irq chip for an irq
23 * @irq: irq number 23 * @irq: irq number
24 * @chip: pointer to irq chip description structure 24 * @chip: pointer to irq chip description structure
25 */ 25 */
26int set_irq_chip(unsigned int irq, struct irq_chip *chip) 26int irq_set_chip(unsigned int irq, struct irq_chip *chip)
27{ 27{
28 struct irq_desc *desc = irq_to_desc(irq);
29 unsigned long flags; 28 unsigned long flags;
29 struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
30 30
31 if (!desc) { 31 if (!desc)
32 WARN(1, KERN_ERR "Trying to install chip for IRQ%d\n", irq);
33 return -EINVAL; 32 return -EINVAL;
34 }
35 33
36 if (!chip) 34 if (!chip)
37 chip = &no_irq_chip; 35 chip = &no_irq_chip;
38 36
39 raw_spin_lock_irqsave(&desc->lock, flags);
40 irq_chip_set_defaults(chip); 37 irq_chip_set_defaults(chip);
41 desc->irq_data.chip = chip; 38 desc->irq_data.chip = chip;
42 raw_spin_unlock_irqrestore(&desc->lock, flags); 39 irq_put_desc_unlock(desc, flags);
43
44 return 0; 40 return 0;
45} 41}
46EXPORT_SYMBOL(set_irq_chip); 42EXPORT_SYMBOL(irq_set_chip);
47 43
48/** 44/**
49 * set_irq_type - set the irq trigger type for an irq 45 * irq_set_type - set the irq trigger type for an irq
50 * @irq: irq number 46 * @irq: irq number
51 * @type: IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h 47 * @type: IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h
52 */ 48 */
53int set_irq_type(unsigned int irq, unsigned int type) 49int irq_set_irq_type(unsigned int irq, unsigned int type)
54{ 50{
55 struct irq_desc *desc = irq_to_desc(irq);
56 unsigned long flags; 51 unsigned long flags;
57 int ret = -ENXIO; 52 struct irq_desc *desc = irq_get_desc_buslock(irq, &flags);
53 int ret = 0;
58 54
59 if (!desc) { 55 if (!desc)
60 printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq); 56 return -EINVAL;
61 return -ENODEV;
62 }
63 57
64 type &= IRQ_TYPE_SENSE_MASK; 58 type &= IRQ_TYPE_SENSE_MASK;
65 if (type == IRQ_TYPE_NONE) 59 if (type != IRQ_TYPE_NONE)
66 return 0; 60 ret = __irq_set_trigger(desc, irq, type);
67 61 irq_put_desc_busunlock(desc, flags);
68 raw_spin_lock_irqsave(&desc->lock, flags);
69 ret = __irq_set_trigger(desc, irq, type);
70 raw_spin_unlock_irqrestore(&desc->lock, flags);
71 return ret; 62 return ret;
72} 63}
73EXPORT_SYMBOL(set_irq_type); 64EXPORT_SYMBOL(irq_set_irq_type);
74 65
75/** 66/**
76 * set_irq_data - set irq type data for an irq 67 * irq_set_handler_data - set irq handler data for an irq
77 * @irq: Interrupt number 68 * @irq: Interrupt number
78 * @data: Pointer to interrupt specific data 69 * @data: Pointer to interrupt specific data
79 * 70 *
80 * Set the hardware irq controller data for an irq 71 * Set the hardware irq controller data for an irq
81 */ 72 */
82int set_irq_data(unsigned int irq, void *data) 73int irq_set_handler_data(unsigned int irq, void *data)
83{ 74{
84 struct irq_desc *desc = irq_to_desc(irq);
85 unsigned long flags; 75 unsigned long flags;
76 struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
86 77
87 if (!desc) { 78 if (!desc)
88 printk(KERN_ERR
89 "Trying to install controller data for IRQ%d\n", irq);
90 return -EINVAL; 79 return -EINVAL;
91 }
92
93 raw_spin_lock_irqsave(&desc->lock, flags);
94 desc->irq_data.handler_data = data; 80 desc->irq_data.handler_data = data;
95 raw_spin_unlock_irqrestore(&desc->lock, flags); 81 irq_put_desc_unlock(desc, flags);
96 return 0; 82 return 0;
97} 83}
98EXPORT_SYMBOL(set_irq_data); 84EXPORT_SYMBOL(irq_set_handler_data);
99 85
100/** 86/**
101 * set_irq_msi - set MSI descriptor data for an irq 87 * irq_set_msi_desc - set MSI descriptor data for an irq
102 * @irq: Interrupt number 88 * @irq: Interrupt number
103 * @entry: Pointer to MSI descriptor data 89 * @entry: Pointer to MSI descriptor data
104 * 90 *
105 * Set the MSI descriptor entry for an irq 91 * Set the MSI descriptor entry for an irq
106 */ 92 */
107int set_irq_msi(unsigned int irq, struct msi_desc *entry) 93int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry)
108{ 94{
109 struct irq_desc *desc = irq_to_desc(irq);
110 unsigned long flags; 95 unsigned long flags;
96 struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
111 97
112 if (!desc) { 98 if (!desc)
113 printk(KERN_ERR
114 "Trying to install msi data for IRQ%d\n", irq);
115 return -EINVAL; 99 return -EINVAL;
116 }
117
118 raw_spin_lock_irqsave(&desc->lock, flags);
119 desc->irq_data.msi_desc = entry; 100 desc->irq_data.msi_desc = entry;
120 if (entry) 101 if (entry)
121 entry->irq = irq; 102 entry->irq = irq;
122 raw_spin_unlock_irqrestore(&desc->lock, flags); 103 irq_put_desc_unlock(desc, flags);
123 return 0; 104 return 0;
124} 105}
125 106
126/** 107/**
127 * set_irq_chip_data - set irq chip data for an irq 108 * irq_set_chip_data - set irq chip data for an irq
128 * @irq: Interrupt number 109 * @irq: Interrupt number
129 * @data: Pointer to chip specific data 110 * @data: Pointer to chip specific data
130 * 111 *
131 * Set the hardware irq chip data for an irq 112 * Set the hardware irq chip data for an irq
132 */ 113 */
133int set_irq_chip_data(unsigned int irq, void *data) 114int irq_set_chip_data(unsigned int irq, void *data)
134{ 115{
135 struct irq_desc *desc = irq_to_desc(irq);
136 unsigned long flags; 116 unsigned long flags;
117 struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
137 118
138 if (!desc) { 119 if (!desc)
139 printk(KERN_ERR
140 "Trying to install chip data for IRQ%d\n", irq);
141 return -EINVAL;
142 }
143
144 if (!desc->irq_data.chip) {
145 printk(KERN_ERR "BUG: bad set_irq_chip_data(IRQ#%d)\n", irq);
146 return -EINVAL; 120 return -EINVAL;
147 }
148
149 raw_spin_lock_irqsave(&desc->lock, flags);
150 desc->irq_data.chip_data = data; 121 desc->irq_data.chip_data = data;
151 raw_spin_unlock_irqrestore(&desc->lock, flags); 122 irq_put_desc_unlock(desc, flags);
152
153 return 0; 123 return 0;
154} 124}
155EXPORT_SYMBOL(set_irq_chip_data); 125EXPORT_SYMBOL(irq_set_chip_data);
156 126
157struct irq_data *irq_get_irq_data(unsigned int irq) 127struct irq_data *irq_get_irq_data(unsigned int irq)
158{ 128{
@@ -162,72 +132,75 @@ struct irq_data *irq_get_irq_data(unsigned int irq)
162} 132}
163EXPORT_SYMBOL_GPL(irq_get_irq_data); 133EXPORT_SYMBOL_GPL(irq_get_irq_data);
164 134
165/** 135static void irq_state_clr_disabled(struct irq_desc *desc)
166 * set_irq_nested_thread - Set/Reset the IRQ_NESTED_THREAD flag of an irq
167 *
168 * @irq: Interrupt number
169 * @nest: 0 to clear / 1 to set the IRQ_NESTED_THREAD flag
170 *
171 * The IRQ_NESTED_THREAD flag indicates that on
172 * request_threaded_irq() no separate interrupt thread should be
173 * created for the irq as the handler are called nested in the
174 * context of a demultiplexing interrupt handler thread.
175 */
176void set_irq_nested_thread(unsigned int irq, int nest)
177{ 136{
178 struct irq_desc *desc = irq_to_desc(irq); 137 desc->istate &= ~IRQS_DISABLED;
179 unsigned long flags; 138 irq_compat_clr_disabled(desc);
180
181 if (!desc)
182 return;
183
184 raw_spin_lock_irqsave(&desc->lock, flags);
185 if (nest)
186 desc->status |= IRQ_NESTED_THREAD;
187 else
188 desc->status &= ~IRQ_NESTED_THREAD;
189 raw_spin_unlock_irqrestore(&desc->lock, flags);
190} 139}
191EXPORT_SYMBOL_GPL(set_irq_nested_thread);
192 140
193/* 141static void irq_state_set_disabled(struct irq_desc *desc)
194 * default enable function
195 */
196static void default_enable(struct irq_data *data)
197{ 142{
198 struct irq_desc *desc = irq_data_to_desc(data); 143 desc->istate |= IRQS_DISABLED;
144 irq_compat_set_disabled(desc);
145}
199 146
200 desc->irq_data.chip->irq_unmask(&desc->irq_data); 147static void irq_state_clr_masked(struct irq_desc *desc)
201 desc->status &= ~IRQ_MASKED; 148{
149 desc->istate &= ~IRQS_MASKED;
150 irq_compat_clr_masked(desc);
202} 151}
203 152
204/* 153static void irq_state_set_masked(struct irq_desc *desc)
205 * default disable function
206 */
207static void default_disable(struct irq_data *data)
208{ 154{
155 desc->istate |= IRQS_MASKED;
156 irq_compat_set_masked(desc);
209} 157}
210 158
211/* 159int irq_startup(struct irq_desc *desc)
212 * default startup function
213 */
214static unsigned int default_startup(struct irq_data *data)
215{ 160{
216 struct irq_desc *desc = irq_data_to_desc(data); 161 irq_state_clr_disabled(desc);
162 desc->depth = 0;
163
164 if (desc->irq_data.chip->irq_startup) {
165 int ret = desc->irq_data.chip->irq_startup(&desc->irq_data);
166 irq_state_clr_masked(desc);
167 return ret;
168 }
217 169
218 desc->irq_data.chip->irq_enable(data); 170 irq_enable(desc);
219 return 0; 171 return 0;
220} 172}
221 173
222/* 174void irq_shutdown(struct irq_desc *desc)
223 * default shutdown function
224 */
225static void default_shutdown(struct irq_data *data)
226{ 175{
227 struct irq_desc *desc = irq_data_to_desc(data); 176 irq_state_set_disabled(desc);
177 desc->depth = 1;
178 if (desc->irq_data.chip->irq_shutdown)
179 desc->irq_data.chip->irq_shutdown(&desc->irq_data);
180 if (desc->irq_data.chip->irq_disable)
181 desc->irq_data.chip->irq_disable(&desc->irq_data);
182 else
183 desc->irq_data.chip->irq_mask(&desc->irq_data);
184 irq_state_set_masked(desc);
185}
228 186
229 desc->irq_data.chip->irq_mask(&desc->irq_data); 187void irq_enable(struct irq_desc *desc)
230 desc->status |= IRQ_MASKED; 188{
189 irq_state_clr_disabled(desc);
190 if (desc->irq_data.chip->irq_enable)
191 desc->irq_data.chip->irq_enable(&desc->irq_data);
192 else
193 desc->irq_data.chip->irq_unmask(&desc->irq_data);
194 irq_state_clr_masked(desc);
195}
196
197void irq_disable(struct irq_desc *desc)
198{
199 irq_state_set_disabled(desc);
200 if (desc->irq_data.chip->irq_disable) {
201 desc->irq_data.chip->irq_disable(&desc->irq_data);
202 irq_state_set_masked(desc);
203 }
231} 204}
232 205
233#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED 206#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
@@ -315,10 +288,6 @@ static void compat_bus_sync_unlock(struct irq_data *data)
315void irq_chip_set_defaults(struct irq_chip *chip) 288void irq_chip_set_defaults(struct irq_chip *chip)
316{ 289{
317#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED 290#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
318 /*
319 * Compat fixup functions need to be before we set the
320 * defaults for enable/disable/startup/shutdown
321 */
322 if (chip->enable) 291 if (chip->enable)
323 chip->irq_enable = compat_irq_enable; 292 chip->irq_enable = compat_irq_enable;
324 if (chip->disable) 293 if (chip->disable)
@@ -327,33 +296,8 @@ void irq_chip_set_defaults(struct irq_chip *chip)
327 chip->irq_shutdown = compat_irq_shutdown; 296 chip->irq_shutdown = compat_irq_shutdown;
328 if (chip->startup) 297 if (chip->startup)
329 chip->irq_startup = compat_irq_startup; 298 chip->irq_startup = compat_irq_startup;
330#endif
331 /*
332 * The real defaults
333 */
334 if (!chip->irq_enable)
335 chip->irq_enable = default_enable;
336 if (!chip->irq_disable)
337 chip->irq_disable = default_disable;
338 if (!chip->irq_startup)
339 chip->irq_startup = default_startup;
340 /*
341 * We use chip->irq_disable, when the user provided its own. When
342 * we have default_disable set for chip->irq_disable, then we need
343 * to use default_shutdown, otherwise the irq line is not
344 * disabled on free_irq():
345 */
346 if (!chip->irq_shutdown)
347 chip->irq_shutdown = chip->irq_disable != default_disable ?
348 chip->irq_disable : default_shutdown;
349
350#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
351 if (!chip->end) 299 if (!chip->end)
352 chip->end = dummy_irq_chip.end; 300 chip->end = dummy_irq_chip.end;
353
354 /*
355 * Now fix up the remaining compat handlers
356 */
357 if (chip->bus_lock) 301 if (chip->bus_lock)
358 chip->irq_bus_lock = compat_bus_lock; 302 chip->irq_bus_lock = compat_bus_lock;
359 if (chip->bus_sync_unlock) 303 if (chip->bus_sync_unlock)
@@ -388,22 +332,22 @@ static inline void mask_ack_irq(struct irq_desc *desc)
388 if (desc->irq_data.chip->irq_ack) 332 if (desc->irq_data.chip->irq_ack)
389 desc->irq_data.chip->irq_ack(&desc->irq_data); 333 desc->irq_data.chip->irq_ack(&desc->irq_data);
390 } 334 }
391 desc->status |= IRQ_MASKED; 335 irq_state_set_masked(desc);
392} 336}
393 337
394static inline void mask_irq(struct irq_desc *desc) 338void mask_irq(struct irq_desc *desc)
395{ 339{
396 if (desc->irq_data.chip->irq_mask) { 340 if (desc->irq_data.chip->irq_mask) {
397 desc->irq_data.chip->irq_mask(&desc->irq_data); 341 desc->irq_data.chip->irq_mask(&desc->irq_data);
398 desc->status |= IRQ_MASKED; 342 irq_state_set_masked(desc);
399 } 343 }
400} 344}
401 345
402static inline void unmask_irq(struct irq_desc *desc) 346void unmask_irq(struct irq_desc *desc)
403{ 347{
404 if (desc->irq_data.chip->irq_unmask) { 348 if (desc->irq_data.chip->irq_unmask) {
405 desc->irq_data.chip->irq_unmask(&desc->irq_data); 349 desc->irq_data.chip->irq_unmask(&desc->irq_data);
406 desc->status &= ~IRQ_MASKED; 350 irq_state_clr_masked(desc);
407 } 351 }
408} 352}
409 353
@@ -428,10 +372,11 @@ void handle_nested_irq(unsigned int irq)
428 kstat_incr_irqs_this_cpu(irq, desc); 372 kstat_incr_irqs_this_cpu(irq, desc);
429 373
430 action = desc->action; 374 action = desc->action;
431 if (unlikely(!action || (desc->status & IRQ_DISABLED))) 375 if (unlikely(!action || (desc->istate & IRQS_DISABLED)))
432 goto out_unlock; 376 goto out_unlock;
433 377
434 desc->status |= IRQ_INPROGRESS; 378 irq_compat_set_progress(desc);
379 desc->istate |= IRQS_INPROGRESS;
435 raw_spin_unlock_irq(&desc->lock); 380 raw_spin_unlock_irq(&desc->lock);
436 381
437 action_ret = action->thread_fn(action->irq, action->dev_id); 382 action_ret = action->thread_fn(action->irq, action->dev_id);
@@ -439,13 +384,21 @@ void handle_nested_irq(unsigned int irq)
439 note_interrupt(irq, desc, action_ret); 384 note_interrupt(irq, desc, action_ret);
440 385
441 raw_spin_lock_irq(&desc->lock); 386 raw_spin_lock_irq(&desc->lock);
442 desc->status &= ~IRQ_INPROGRESS; 387 desc->istate &= ~IRQS_INPROGRESS;
388 irq_compat_clr_progress(desc);
443 389
444out_unlock: 390out_unlock:
445 raw_spin_unlock_irq(&desc->lock); 391 raw_spin_unlock_irq(&desc->lock);
446} 392}
447EXPORT_SYMBOL_GPL(handle_nested_irq); 393EXPORT_SYMBOL_GPL(handle_nested_irq);
448 394
395static bool irq_check_poll(struct irq_desc *desc)
396{
397 if (!(desc->istate & IRQS_POLL_INPROGRESS))
398 return false;
399 return irq_wait_for_poll(desc);
400}
401
449/** 402/**
450 * handle_simple_irq - Simple and software-decoded IRQs. 403 * handle_simple_irq - Simple and software-decoded IRQs.
451 * @irq: the interrupt number 404 * @irq: the interrupt number
@@ -461,29 +414,20 @@ EXPORT_SYMBOL_GPL(handle_nested_irq);
461void 414void
462handle_simple_irq(unsigned int irq, struct irq_desc *desc) 415handle_simple_irq(unsigned int irq, struct irq_desc *desc)
463{ 416{
464 struct irqaction *action;
465 irqreturn_t action_ret;
466
467 raw_spin_lock(&desc->lock); 417 raw_spin_lock(&desc->lock);
468 418
469 if (unlikely(desc->status & IRQ_INPROGRESS)) 419 if (unlikely(desc->istate & IRQS_INPROGRESS))
470 goto out_unlock; 420 if (!irq_check_poll(desc))
471 desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); 421 goto out_unlock;
422
423 desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
472 kstat_incr_irqs_this_cpu(irq, desc); 424 kstat_incr_irqs_this_cpu(irq, desc);
473 425
474 action = desc->action; 426 if (unlikely(!desc->action || (desc->istate & IRQS_DISABLED)))
475 if (unlikely(!action || (desc->status & IRQ_DISABLED)))
476 goto out_unlock; 427 goto out_unlock;
477 428
478 desc->status |= IRQ_INPROGRESS; 429 handle_irq_event(desc);
479 raw_spin_unlock(&desc->lock);
480 430
481 action_ret = handle_IRQ_event(irq, action);
482 if (!noirqdebug)
483 note_interrupt(irq, desc, action_ret);
484
485 raw_spin_lock(&desc->lock);
486 desc->status &= ~IRQ_INPROGRESS;
487out_unlock: 431out_unlock:
488 raw_spin_unlock(&desc->lock); 432 raw_spin_unlock(&desc->lock);
489} 433}
@@ -501,42 +445,42 @@ out_unlock:
501void 445void
502handle_level_irq(unsigned int irq, struct irq_desc *desc) 446handle_level_irq(unsigned int irq, struct irq_desc *desc)
503{ 447{
504 struct irqaction *action;
505 irqreturn_t action_ret;
506
507 raw_spin_lock(&desc->lock); 448 raw_spin_lock(&desc->lock);
508 mask_ack_irq(desc); 449 mask_ack_irq(desc);
509 450
510 if (unlikely(desc->status & IRQ_INPROGRESS)) 451 if (unlikely(desc->istate & IRQS_INPROGRESS))
511 goto out_unlock; 452 if (!irq_check_poll(desc))
512 desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); 453 goto out_unlock;
454
455 desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
513 kstat_incr_irqs_this_cpu(irq, desc); 456 kstat_incr_irqs_this_cpu(irq, desc);
514 457
515 /* 458 /*
516 * If its disabled or no action available 459 * If its disabled or no action available
517 * keep it masked and get out of here 460 * keep it masked and get out of here
518 */ 461 */
519 action = desc->action; 462 if (unlikely(!desc->action || (desc->istate & IRQS_DISABLED)))
520 if (unlikely(!action || (desc->status & IRQ_DISABLED)))
521 goto out_unlock; 463 goto out_unlock;
522 464
523 desc->status |= IRQ_INPROGRESS; 465 handle_irq_event(desc);
524 raw_spin_unlock(&desc->lock);
525
526 action_ret = handle_IRQ_event(irq, action);
527 if (!noirqdebug)
528 note_interrupt(irq, desc, action_ret);
529 466
530 raw_spin_lock(&desc->lock); 467 if (!(desc->istate & (IRQS_DISABLED | IRQS_ONESHOT)))
531 desc->status &= ~IRQ_INPROGRESS;
532
533 if (!(desc->status & (IRQ_DISABLED | IRQ_ONESHOT)))
534 unmask_irq(desc); 468 unmask_irq(desc);
535out_unlock: 469out_unlock:
536 raw_spin_unlock(&desc->lock); 470 raw_spin_unlock(&desc->lock);
537} 471}
538EXPORT_SYMBOL_GPL(handle_level_irq); 472EXPORT_SYMBOL_GPL(handle_level_irq);
539 473
474#ifdef CONFIG_IRQ_PREFLOW_FASTEOI
475static inline void preflow_handler(struct irq_desc *desc)
476{
477 if (desc->preflow_handler)
478 desc->preflow_handler(&desc->irq_data);
479}
480#else
481static inline void preflow_handler(struct irq_desc *desc) { }
482#endif
483
540/** 484/**
541 * handle_fasteoi_irq - irq handler for transparent controllers 485 * handle_fasteoi_irq - irq handler for transparent controllers
542 * @irq: the interrupt number 486 * @irq: the interrupt number
@@ -550,42 +494,41 @@ EXPORT_SYMBOL_GPL(handle_level_irq);
550void 494void
551handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) 495handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
552{ 496{
553 struct irqaction *action;
554 irqreturn_t action_ret;
555
556 raw_spin_lock(&desc->lock); 497 raw_spin_lock(&desc->lock);
557 498
558 if (unlikely(desc->status & IRQ_INPROGRESS)) 499 if (unlikely(desc->istate & IRQS_INPROGRESS))
559 goto out; 500 if (!irq_check_poll(desc))
501 goto out;
560 502
561 desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); 503 desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
562 kstat_incr_irqs_this_cpu(irq, desc); 504 kstat_incr_irqs_this_cpu(irq, desc);
563 505
564 /* 506 /*
565 * If its disabled or no action available 507 * If its disabled or no action available
566 * then mask it and get out of here: 508 * then mask it and get out of here:
567 */ 509 */
568 action = desc->action; 510 if (unlikely(!desc->action || (desc->istate & IRQS_DISABLED))) {
569 if (unlikely(!action || (desc->status & IRQ_DISABLED))) { 511 irq_compat_set_pending(desc);
570 desc->status |= IRQ_PENDING; 512 desc->istate |= IRQS_PENDING;
571 mask_irq(desc); 513 mask_irq(desc);
572 goto out; 514 goto out;
573 } 515 }
574 516
575 desc->status |= IRQ_INPROGRESS; 517 if (desc->istate & IRQS_ONESHOT)
576 desc->status &= ~IRQ_PENDING; 518 mask_irq(desc);
577 raw_spin_unlock(&desc->lock);
578 519
579 action_ret = handle_IRQ_event(irq, action); 520 preflow_handler(desc);
580 if (!noirqdebug) 521 handle_irq_event(desc);
581 note_interrupt(irq, desc, action_ret);
582 522
583 raw_spin_lock(&desc->lock); 523out_eoi:
584 desc->status &= ~IRQ_INPROGRESS;
585out:
586 desc->irq_data.chip->irq_eoi(&desc->irq_data); 524 desc->irq_data.chip->irq_eoi(&desc->irq_data);
587 525out_unlock:
588 raw_spin_unlock(&desc->lock); 526 raw_spin_unlock(&desc->lock);
527 return;
528out:
529 if (!(desc->irq_data.chip->flags & IRQCHIP_EOI_IF_HANDLED))
530 goto out_eoi;
531 goto out_unlock;
589} 532}
590 533
591/** 534/**
@@ -609,32 +552,28 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
609{ 552{
610 raw_spin_lock(&desc->lock); 553 raw_spin_lock(&desc->lock);
611 554
612 desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); 555 desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
613
614 /* 556 /*
615 * If we're currently running this IRQ, or its disabled, 557 * If we're currently running this IRQ, or its disabled,
616 * we shouldn't process the IRQ. Mark it pending, handle 558 * we shouldn't process the IRQ. Mark it pending, handle
617 * the necessary masking and go out 559 * the necessary masking and go out
618 */ 560 */
619 if (unlikely((desc->status & (IRQ_INPROGRESS | IRQ_DISABLED)) || 561 if (unlikely((desc->istate & (IRQS_DISABLED | IRQS_INPROGRESS) ||
620 !desc->action)) { 562 !desc->action))) {
621 desc->status |= (IRQ_PENDING | IRQ_MASKED); 563 if (!irq_check_poll(desc)) {
622 mask_ack_irq(desc); 564 irq_compat_set_pending(desc);
623 goto out_unlock; 565 desc->istate |= IRQS_PENDING;
566 mask_ack_irq(desc);
567 goto out_unlock;
568 }
624 } 569 }
625 kstat_incr_irqs_this_cpu(irq, desc); 570 kstat_incr_irqs_this_cpu(irq, desc);
626 571
627 /* Start handling the irq */ 572 /* Start handling the irq */
628 desc->irq_data.chip->irq_ack(&desc->irq_data); 573 desc->irq_data.chip->irq_ack(&desc->irq_data);
629 574
630 /* Mark the IRQ currently in progress.*/
631 desc->status |= IRQ_INPROGRESS;
632
633 do { 575 do {
634 struct irqaction *action = desc->action; 576 if (unlikely(!desc->action)) {
635 irqreturn_t action_ret;
636
637 if (unlikely(!action)) {
638 mask_irq(desc); 577 mask_irq(desc);
639 goto out_unlock; 578 goto out_unlock;
640 } 579 }
@@ -644,22 +583,17 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
644 * one, we could have masked the irq. 583 * one, we could have masked the irq.
645 * Renable it, if it was not disabled in meantime. 584 * Renable it, if it was not disabled in meantime.
646 */ 585 */
647 if (unlikely((desc->status & 586 if (unlikely(desc->istate & IRQS_PENDING)) {
648 (IRQ_PENDING | IRQ_MASKED | IRQ_DISABLED)) == 587 if (!(desc->istate & IRQS_DISABLED) &&
649 (IRQ_PENDING | IRQ_MASKED))) { 588 (desc->istate & IRQS_MASKED))
650 unmask_irq(desc); 589 unmask_irq(desc);
651 } 590 }
652 591
653 desc->status &= ~IRQ_PENDING; 592 handle_irq_event(desc);
654 raw_spin_unlock(&desc->lock);
655 action_ret = handle_IRQ_event(irq, action);
656 if (!noirqdebug)
657 note_interrupt(irq, desc, action_ret);
658 raw_spin_lock(&desc->lock);
659 593
660 } while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING); 594 } while ((desc->istate & IRQS_PENDING) &&
595 !(desc->istate & IRQS_DISABLED));
661 596
662 desc->status &= ~IRQ_INPROGRESS;
663out_unlock: 597out_unlock:
664 raw_spin_unlock(&desc->lock); 598 raw_spin_unlock(&desc->lock);
665} 599}
@@ -674,103 +608,84 @@ out_unlock:
674void 608void
675handle_percpu_irq(unsigned int irq, struct irq_desc *desc) 609handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
676{ 610{
677 irqreturn_t action_ret; 611 struct irq_chip *chip = irq_desc_get_chip(desc);
678 612
679 kstat_incr_irqs_this_cpu(irq, desc); 613 kstat_incr_irqs_this_cpu(irq, desc);
680 614
681 if (desc->irq_data.chip->irq_ack) 615 if (chip->irq_ack)
682 desc->irq_data.chip->irq_ack(&desc->irq_data); 616 chip->irq_ack(&desc->irq_data);
683 617
684 action_ret = handle_IRQ_event(irq, desc->action); 618 handle_irq_event_percpu(desc, desc->action);
685 if (!noirqdebug)
686 note_interrupt(irq, desc, action_ret);
687 619
688 if (desc->irq_data.chip->irq_eoi) 620 if (chip->irq_eoi)
689 desc->irq_data.chip->irq_eoi(&desc->irq_data); 621 chip->irq_eoi(&desc->irq_data);
690} 622}
691 623
692void 624void
693__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, 625__irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
694 const char *name) 626 const char *name)
695{ 627{
696 struct irq_desc *desc = irq_to_desc(irq);
697 unsigned long flags; 628 unsigned long flags;
629 struct irq_desc *desc = irq_get_desc_buslock(irq, &flags);
698 630
699 if (!desc) { 631 if (!desc)
700 printk(KERN_ERR
701 "Trying to install type control for IRQ%d\n", irq);
702 return; 632 return;
703 }
704 633
705 if (!handle) 634 if (!handle) {
706 handle = handle_bad_irq; 635 handle = handle_bad_irq;
707 else if (desc->irq_data.chip == &no_irq_chip) { 636 } else {
708 printk(KERN_WARNING "Trying to install %sinterrupt handler " 637 if (WARN_ON(desc->irq_data.chip == &no_irq_chip))
709 "for IRQ%d\n", is_chained ? "chained " : "", irq); 638 goto out;
710 /*
711 * Some ARM implementations install a handler for really dumb
712 * interrupt hardware without setting an irq_chip. This worked
713 * with the ARM no_irq_chip but the check in setup_irq would
714 * prevent us to setup the interrupt at all. Switch it to
715 * dummy_irq_chip for easy transition.
716 */
717 desc->irq_data.chip = &dummy_irq_chip;
718 } 639 }
719 640
720 chip_bus_lock(desc);
721 raw_spin_lock_irqsave(&desc->lock, flags);
722
723 /* Uninstall? */ 641 /* Uninstall? */
724 if (handle == handle_bad_irq) { 642 if (handle == handle_bad_irq) {
725 if (desc->irq_data.chip != &no_irq_chip) 643 if (desc->irq_data.chip != &no_irq_chip)
726 mask_ack_irq(desc); 644 mask_ack_irq(desc);
727 desc->status |= IRQ_DISABLED; 645 irq_compat_set_disabled(desc);
646 desc->istate |= IRQS_DISABLED;
728 desc->depth = 1; 647 desc->depth = 1;
729 } 648 }
730 desc->handle_irq = handle; 649 desc->handle_irq = handle;
731 desc->name = name; 650 desc->name = name;
732 651
733 if (handle != handle_bad_irq && is_chained) { 652 if (handle != handle_bad_irq && is_chained) {
734 desc->status &= ~IRQ_DISABLED; 653 irq_settings_set_noprobe(desc);
735 desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE; 654 irq_settings_set_norequest(desc);
736 desc->depth = 0; 655 irq_startup(desc);
737 desc->irq_data.chip->irq_startup(&desc->irq_data);
738 } 656 }
739 raw_spin_unlock_irqrestore(&desc->lock, flags); 657out:
740 chip_bus_sync_unlock(desc); 658 irq_put_desc_busunlock(desc, flags);
741}
742EXPORT_SYMBOL_GPL(__set_irq_handler);
743
744void
745set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip,
746 irq_flow_handler_t handle)
747{
748 set_irq_chip(irq, chip);
749 __set_irq_handler(irq, handle, 0, NULL);
750} 659}
660EXPORT_SYMBOL_GPL(__irq_set_handler);
751 661
752void 662void
753set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, 663irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
754 irq_flow_handler_t handle, const char *name) 664 irq_flow_handler_t handle, const char *name)
755{ 665{
756 set_irq_chip(irq, chip); 666 irq_set_chip(irq, chip);
757 __set_irq_handler(irq, handle, 0, name); 667 __irq_set_handler(irq, handle, 0, name);
758} 668}
759 669
760void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) 670void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set)
761{ 671{
762 struct irq_desc *desc = irq_to_desc(irq);
763 unsigned long flags; 672 unsigned long flags;
673 struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
764 674
765 if (!desc) 675 if (!desc)
766 return; 676 return;
677 irq_settings_clr_and_set(desc, clr, set);
678
679 irqd_clear(&desc->irq_data, IRQD_NO_BALANCING | IRQD_PER_CPU |
680 IRQD_TRIGGER_MASK | IRQD_LEVEL | IRQD_MOVE_PCNTXT);
681 if (irq_settings_has_no_balance_set(desc))
682 irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
683 if (irq_settings_is_per_cpu(desc))
684 irqd_set(&desc->irq_data, IRQD_PER_CPU);
685 if (irq_settings_can_move_pcntxt(desc))
686 irqd_set(&desc->irq_data, IRQD_MOVE_PCNTXT);
767 687
768 /* Sanitize flags */ 688 irqd_set(&desc->irq_data, irq_settings_get_trigger_mask(desc));
769 set &= IRQF_MODIFY_MASK;
770 clr &= IRQF_MODIFY_MASK;
771 689
772 raw_spin_lock_irqsave(&desc->lock, flags); 690 irq_put_desc_unlock(desc, flags);
773 desc->status &= ~clr;
774 desc->status |= set;
775 raw_spin_unlock_irqrestore(&desc->lock, flags);
776} 691}
diff --git a/kernel/irq/compat.h b/kernel/irq/compat.h
new file mode 100644
index 000000000000..6bbaf66aca85
--- /dev/null
+++ b/kernel/irq/compat.h
@@ -0,0 +1,72 @@
1/*
2 * Compat layer for transition period
3 */
4#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
5static inline void irq_compat_set_progress(struct irq_desc *desc)
6{
7 desc->status |= IRQ_INPROGRESS;
8}
9
10static inline void irq_compat_clr_progress(struct irq_desc *desc)
11{
12 desc->status &= ~IRQ_INPROGRESS;
13}
14static inline void irq_compat_set_disabled(struct irq_desc *desc)
15{
16 desc->status |= IRQ_DISABLED;
17}
18static inline void irq_compat_clr_disabled(struct irq_desc *desc)
19{
20 desc->status &= ~IRQ_DISABLED;
21}
22static inline void irq_compat_set_pending(struct irq_desc *desc)
23{
24 desc->status |= IRQ_PENDING;
25}
26
27static inline void irq_compat_clr_pending(struct irq_desc *desc)
28{
29 desc->status &= ~IRQ_PENDING;
30}
31static inline void irq_compat_set_masked(struct irq_desc *desc)
32{
33 desc->status |= IRQ_MASKED;
34}
35
36static inline void irq_compat_clr_masked(struct irq_desc *desc)
37{
38 desc->status &= ~IRQ_MASKED;
39}
40static inline void irq_compat_set_move_pending(struct irq_desc *desc)
41{
42 desc->status |= IRQ_MOVE_PENDING;
43}
44
45static inline void irq_compat_clr_move_pending(struct irq_desc *desc)
46{
47 desc->status &= ~IRQ_MOVE_PENDING;
48}
49static inline void irq_compat_set_affinity(struct irq_desc *desc)
50{
51 desc->status |= IRQ_AFFINITY_SET;
52}
53
54static inline void irq_compat_clr_affinity(struct irq_desc *desc)
55{
56 desc->status &= ~IRQ_AFFINITY_SET;
57}
58#else
59static inline void irq_compat_set_progress(struct irq_desc *desc) { }
60static inline void irq_compat_clr_progress(struct irq_desc *desc) { }
61static inline void irq_compat_set_disabled(struct irq_desc *desc) { }
62static inline void irq_compat_clr_disabled(struct irq_desc *desc) { }
63static inline void irq_compat_set_pending(struct irq_desc *desc) { }
64static inline void irq_compat_clr_pending(struct irq_desc *desc) { }
65static inline void irq_compat_set_masked(struct irq_desc *desc) { }
66static inline void irq_compat_clr_masked(struct irq_desc *desc) { }
67static inline void irq_compat_set_move_pending(struct irq_desc *desc) { }
68static inline void irq_compat_clr_move_pending(struct irq_desc *desc) { }
69static inline void irq_compat_set_affinity(struct irq_desc *desc) { }
70static inline void irq_compat_clr_affinity(struct irq_desc *desc) { }
71#endif
72
diff --git a/kernel/irq/debug.h b/kernel/irq/debug.h
new file mode 100644
index 000000000000..d1a33b7fa61d
--- /dev/null
+++ b/kernel/irq/debug.h
@@ -0,0 +1,40 @@
1/*
2 * Debugging printout:
3 */
4
5#include <linux/kallsyms.h>
6
7#define P(f) if (desc->status & f) printk("%14s set\n", #f)
8#define PS(f) if (desc->istate & f) printk("%14s set\n", #f)
9
10static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
11{
12 printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n",
13 irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled);
14 printk("->handle_irq(): %p, ", desc->handle_irq);
15 print_symbol("%s\n", (unsigned long)desc->handle_irq);
16 printk("->irq_data.chip(): %p, ", desc->irq_data.chip);
17 print_symbol("%s\n", (unsigned long)desc->irq_data.chip);
18 printk("->action(): %p\n", desc->action);
19 if (desc->action) {
20 printk("->action->handler(): %p, ", desc->action->handler);
21 print_symbol("%s\n", (unsigned long)desc->action->handler);
22 }
23
24 P(IRQ_LEVEL);
25 P(IRQ_PER_CPU);
26 P(IRQ_NOPROBE);
27 P(IRQ_NOREQUEST);
28 P(IRQ_NOAUTOEN);
29
30 PS(IRQS_AUTODETECT);
31 PS(IRQS_INPROGRESS);
32 PS(IRQS_REPLAY);
33 PS(IRQS_WAITING);
34 PS(IRQS_DISABLED);
35 PS(IRQS_PENDING);
36 PS(IRQS_MASKED);
37}
38
39#undef P
40#undef PS
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 3540a7190122..517561fc7317 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -51,30 +51,92 @@ static void warn_no_thread(unsigned int irq, struct irqaction *action)
51 "but no thread function available.", irq, action->name); 51 "but no thread function available.", irq, action->name);
52} 52}
53 53
54/** 54static void irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
55 * handle_IRQ_event - irq action chain handler 55{
56 * @irq: the interrupt number 56 /*
57 * @action: the interrupt action chain for this irq 57 * Wake up the handler thread for this action. In case the
58 * 58 * thread crashed and was killed we just pretend that we
59 * Handles the action chain of an irq event 59 * handled the interrupt. The hardirq handler has disabled the
60 */ 60 * device interrupt, so no irq storm is lurking. If the
61irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) 61 * RUNTHREAD bit is already set, nothing to do.
62 */
63 if (test_bit(IRQTF_DIED, &action->thread_flags) ||
64 test_and_set_bit(IRQTF_RUNTHREAD, &action->thread_flags))
65 return;
66
67 /*
68 * It's safe to OR the mask lockless here. We have only two
69 * places which write to threads_oneshot: This code and the
70 * irq thread.
71 *
72 * This code is the hard irq context and can never run on two
73 * cpus in parallel. If it ever does we have more serious
74 * problems than this bitmask.
75 *
76 * The irq threads of this irq which clear their "running" bit
77 * in threads_oneshot are serialized via desc->lock against
78 * each other and they are serialized against this code by
79 * IRQS_INPROGRESS.
80 *
81 * Hard irq handler:
82 *
83 * spin_lock(desc->lock);
84 * desc->state |= IRQS_INPROGRESS;
85 * spin_unlock(desc->lock);
86 * set_bit(IRQTF_RUNTHREAD, &action->thread_flags);
87 * desc->threads_oneshot |= mask;
88 * spin_lock(desc->lock);
89 * desc->state &= ~IRQS_INPROGRESS;
90 * spin_unlock(desc->lock);
91 *
92 * irq thread:
93 *
94 * again:
95 * spin_lock(desc->lock);
96 * if (desc->state & IRQS_INPROGRESS) {
97 * spin_unlock(desc->lock);
98 * while(desc->state & IRQS_INPROGRESS)
99 * cpu_relax();
100 * goto again;
101 * }
102 * if (!test_bit(IRQTF_RUNTHREAD, &action->thread_flags))
103 * desc->threads_oneshot &= ~mask;
104 * spin_unlock(desc->lock);
105 *
106 * So either the thread waits for us to clear IRQS_INPROGRESS
107 * or we are waiting in the flow handler for desc->lock to be
108 * released before we reach this point. The thread also checks
109 * IRQTF_RUNTHREAD under desc->lock. If set it leaves
110 * threads_oneshot untouched and runs the thread another time.
111 */
112 desc->threads_oneshot |= action->thread_mask;
113 wake_up_process(action->thread);
114}
115
116irqreturn_t
117handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
62{ 118{
63 irqreturn_t ret, retval = IRQ_NONE; 119 irqreturn_t retval = IRQ_NONE;
64 unsigned int status = 0; 120 unsigned int random = 0, irq = desc->irq_data.irq;
65 121
66 do { 122 do {
123 irqreturn_t res;
124
67 trace_irq_handler_entry(irq, action); 125 trace_irq_handler_entry(irq, action);
68 ret = action->handler(irq, action->dev_id); 126 res = action->handler(irq, action->dev_id);
69 trace_irq_handler_exit(irq, action, ret); 127 trace_irq_handler_exit(irq, action, res);
70 128
71 switch (ret) { 129 if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pF enabled interrupts\n",
130 irq, action->handler))
131 local_irq_disable();
132
133 switch (res) {
72 case IRQ_WAKE_THREAD: 134 case IRQ_WAKE_THREAD:
73 /* 135 /*
74 * Set result to handled so the spurious check 136 * Set result to handled so the spurious check
75 * does not trigger. 137 * does not trigger.
76 */ 138 */
77 ret = IRQ_HANDLED; 139 res = IRQ_HANDLED;
78 140
79 /* 141 /*
80 * Catch drivers which return WAKE_THREAD but 142 * Catch drivers which return WAKE_THREAD but
@@ -85,36 +147,56 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
85 break; 147 break;
86 } 148 }
87 149
88 /* 150 irq_wake_thread(desc, action);
89 * Wake up the handler thread for this
90 * action. In case the thread crashed and was
91 * killed we just pretend that we handled the
92 * interrupt. The hardirq handler above has
93 * disabled the device interrupt, so no irq
94 * storm is lurking.
95 */
96 if (likely(!test_bit(IRQTF_DIED,
97 &action->thread_flags))) {
98 set_bit(IRQTF_RUNTHREAD, &action->thread_flags);
99 wake_up_process(action->thread);
100 }
101 151
102 /* Fall through to add to randomness */ 152 /* Fall through to add to randomness */
103 case IRQ_HANDLED: 153 case IRQ_HANDLED:
104 status |= action->flags; 154 random |= action->flags;
105 break; 155 break;
106 156
107 default: 157 default:
108 break; 158 break;
109 } 159 }
110 160
111 retval |= ret; 161 retval |= res;
112 action = action->next; 162 action = action->next;
113 } while (action); 163 } while (action);
114 164
115 if (status & IRQF_SAMPLE_RANDOM) 165 if (random & IRQF_SAMPLE_RANDOM)
116 add_interrupt_randomness(irq); 166 add_interrupt_randomness(irq);
117 local_irq_disable();
118 167
168 if (!noirqdebug)
169 note_interrupt(irq, desc, retval);
119 return retval; 170 return retval;
120} 171}
172
173irqreturn_t handle_irq_event(struct irq_desc *desc)
174{
175 struct irqaction *action = desc->action;
176 irqreturn_t ret;
177
178 irq_compat_clr_pending(desc);
179 desc->istate &= ~IRQS_PENDING;
180 irq_compat_set_progress(desc);
181 desc->istate |= IRQS_INPROGRESS;
182 raw_spin_unlock(&desc->lock);
183
184 ret = handle_irq_event_percpu(desc, action);
185
186 raw_spin_lock(&desc->lock);
187 desc->istate &= ~IRQS_INPROGRESS;
188 irq_compat_clr_progress(desc);
189 return ret;
190}
191
192/**
193 * handle_IRQ_event - irq action chain handler
194 * @irq: the interrupt number
195 * @action: the interrupt action chain for this irq
196 *
197 * Handles the action chain of an irq event
198 */
199irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
200{
201 return handle_irq_event_percpu(irq_to_desc(irq), action);
202}
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 4571ae7e085a..6c6ec9a49027 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -1,27 +1,101 @@
1/* 1/*
2 * IRQ subsystem internal functions and variables: 2 * IRQ subsystem internal functions and variables:
3 *
4 * Do not ever include this file from anything else than
5 * kernel/irq/. Do not even think about using any information outside
6 * of this file for your non core code.
3 */ 7 */
4#include <linux/irqdesc.h> 8#include <linux/irqdesc.h>
5 9
10#ifdef CONFIG_SPARSE_IRQ
11# define IRQ_BITMAP_BITS (NR_IRQS + 8196)
12#else
13# define IRQ_BITMAP_BITS NR_IRQS
14#endif
15
16#define istate core_internal_state__do_not_mess_with_it
17
18#ifdef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
19# define status status_use_accessors
20#endif
21
6extern int noirqdebug; 22extern int noirqdebug;
7 23
24/*
25 * Bits used by threaded handlers:
26 * IRQTF_RUNTHREAD - signals that the interrupt handler thread should run
27 * IRQTF_DIED - handler thread died
28 * IRQTF_WARNED - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed
29 * IRQTF_AFFINITY - irq thread is requested to adjust affinity
30 * IRQTF_FORCED_THREAD - irq action is force threaded
31 */
32enum {
33 IRQTF_RUNTHREAD,
34 IRQTF_DIED,
35 IRQTF_WARNED,
36 IRQTF_AFFINITY,
37 IRQTF_FORCED_THREAD,
38};
39
40/*
41 * Bit masks for desc->state
42 *
43 * IRQS_AUTODETECT - autodetection in progress
44 * IRQS_SPURIOUS_DISABLED - was disabled due to spurious interrupt
45 * detection
46 * IRQS_POLL_INPROGRESS - polling in progress
47 * IRQS_INPROGRESS - Interrupt in progress
48 * IRQS_ONESHOT - irq is not unmasked in primary handler
49 * IRQS_REPLAY - irq is replayed
50 * IRQS_WAITING - irq is waiting
51 * IRQS_DISABLED - irq is disabled
52 * IRQS_PENDING - irq is pending and replayed later
53 * IRQS_MASKED - irq is masked
54 * IRQS_SUSPENDED - irq is suspended
55 */
56enum {
57 IRQS_AUTODETECT = 0x00000001,
58 IRQS_SPURIOUS_DISABLED = 0x00000002,
59 IRQS_POLL_INPROGRESS = 0x00000008,
60 IRQS_INPROGRESS = 0x00000010,
61 IRQS_ONESHOT = 0x00000020,
62 IRQS_REPLAY = 0x00000040,
63 IRQS_WAITING = 0x00000080,
64 IRQS_DISABLED = 0x00000100,
65 IRQS_PENDING = 0x00000200,
66 IRQS_MASKED = 0x00000400,
67 IRQS_SUSPENDED = 0x00000800,
68};
69
70#include "compat.h"
71#include "debug.h"
72#include "settings.h"
73
8#define irq_data_to_desc(data) container_of(data, struct irq_desc, irq_data) 74#define irq_data_to_desc(data) container_of(data, struct irq_desc, irq_data)
9 75
10/* Set default functions for irq_chip structures: */ 76/* Set default functions for irq_chip structures: */
11extern void irq_chip_set_defaults(struct irq_chip *chip); 77extern void irq_chip_set_defaults(struct irq_chip *chip);
12 78
13/* Set default handler: */
14extern void compat_irq_chip_set_default_handler(struct irq_desc *desc);
15
16extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, 79extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
17 unsigned long flags); 80 unsigned long flags);
18extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp); 81extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp);
19extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume); 82extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
20 83
84extern int irq_startup(struct irq_desc *desc);
85extern void irq_shutdown(struct irq_desc *desc);
86extern void irq_enable(struct irq_desc *desc);
87extern void irq_disable(struct irq_desc *desc);
88extern void mask_irq(struct irq_desc *desc);
89extern void unmask_irq(struct irq_desc *desc);
90
21extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); 91extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
22 92
93irqreturn_t handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action);
94irqreturn_t handle_irq_event(struct irq_desc *desc);
95
23/* Resending of interrupts :*/ 96/* Resending of interrupts :*/
24void check_irq_resend(struct irq_desc *desc, unsigned int irq); 97void check_irq_resend(struct irq_desc *desc, unsigned int irq);
98bool irq_wait_for_poll(struct irq_desc *desc);
25 99
26#ifdef CONFIG_PROC_FS 100#ifdef CONFIG_PROC_FS
27extern void register_irq_proc(unsigned int irq, struct irq_desc *desc); 101extern void register_irq_proc(unsigned int irq, struct irq_desc *desc);
@@ -37,20 +111,10 @@ static inline void unregister_handler_proc(unsigned int irq,
37 struct irqaction *action) { } 111 struct irqaction *action) { }
38#endif 112#endif
39 113
40extern int irq_select_affinity_usr(unsigned int irq); 114extern int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask);
41 115
42extern void irq_set_thread_affinity(struct irq_desc *desc); 116extern void irq_set_thread_affinity(struct irq_desc *desc);
43 117
44#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
45static inline void irq_end(unsigned int irq, struct irq_desc *desc)
46{
47 if (desc->irq_data.chip && desc->irq_data.chip->end)
48 desc->irq_data.chip->end(irq);
49}
50#else
51static inline void irq_end(unsigned int irq, struct irq_desc *desc) { }
52#endif
53
54/* Inline functions for support of irq chips on slow busses */ 118/* Inline functions for support of irq chips on slow busses */
55static inline void chip_bus_lock(struct irq_desc *desc) 119static inline void chip_bus_lock(struct irq_desc *desc)
56{ 120{
@@ -64,43 +128,60 @@ static inline void chip_bus_sync_unlock(struct irq_desc *desc)
64 desc->irq_data.chip->irq_bus_sync_unlock(&desc->irq_data); 128 desc->irq_data.chip->irq_bus_sync_unlock(&desc->irq_data);
65} 129}
66 130
131struct irq_desc *
132__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus);
133void __irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags, bool bus);
134
135static inline struct irq_desc *
136irq_get_desc_buslock(unsigned int irq, unsigned long *flags)
137{
138 return __irq_get_desc_lock(irq, flags, true);
139}
140
141static inline void
142irq_put_desc_busunlock(struct irq_desc *desc, unsigned long flags)
143{
144 __irq_put_desc_unlock(desc, flags, true);
145}
146
147static inline struct irq_desc *
148irq_get_desc_lock(unsigned int irq, unsigned long *flags)
149{
150 return __irq_get_desc_lock(irq, flags, false);
151}
152
153static inline void
154irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags)
155{
156 __irq_put_desc_unlock(desc, flags, false);
157}
158
67/* 159/*
68 * Debugging printout: 160 * Manipulation functions for irq_data.state
69 */ 161 */
162static inline void irqd_set_move_pending(struct irq_data *d)
163{
164 d->state_use_accessors |= IRQD_SETAFFINITY_PENDING;
165 irq_compat_set_move_pending(irq_data_to_desc(d));
166}
70 167
71#include <linux/kallsyms.h> 168static inline void irqd_clr_move_pending(struct irq_data *d)
72 169{
73#define P(f) if (desc->status & f) printk("%14s set\n", #f) 170 d->state_use_accessors &= ~IRQD_SETAFFINITY_PENDING;
171 irq_compat_clr_move_pending(irq_data_to_desc(d));
172}
74 173
75static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc) 174static inline void irqd_clear(struct irq_data *d, unsigned int mask)
76{ 175{
77 printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n", 176 d->state_use_accessors &= ~mask;
78 irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled);
79 printk("->handle_irq(): %p, ", desc->handle_irq);
80 print_symbol("%s\n", (unsigned long)desc->handle_irq);
81 printk("->irq_data.chip(): %p, ", desc->irq_data.chip);
82 print_symbol("%s\n", (unsigned long)desc->irq_data.chip);
83 printk("->action(): %p\n", desc->action);
84 if (desc->action) {
85 printk("->action->handler(): %p, ", desc->action->handler);
86 print_symbol("%s\n", (unsigned long)desc->action->handler);
87 }
88
89 P(IRQ_INPROGRESS);
90 P(IRQ_DISABLED);
91 P(IRQ_PENDING);
92 P(IRQ_REPLAY);
93 P(IRQ_AUTODETECT);
94 P(IRQ_WAITING);
95 P(IRQ_LEVEL);
96 P(IRQ_MASKED);
97#ifdef CONFIG_IRQ_PER_CPU
98 P(IRQ_PER_CPU);
99#endif
100 P(IRQ_NOPROBE);
101 P(IRQ_NOREQUEST);
102 P(IRQ_NOAUTOEN);
103} 177}
104 178
105#undef P 179static inline void irqd_set(struct irq_data *d, unsigned int mask)
180{
181 d->state_use_accessors |= mask;
182}
106 183
184static inline bool irqd_has_set(struct irq_data *d, unsigned int mask)
185{
186 return d->state_use_accessors & mask;
187}
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 282f20230e67..dbccc799407f 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -79,7 +79,8 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
79 desc->irq_data.chip_data = NULL; 79 desc->irq_data.chip_data = NULL;
80 desc->irq_data.handler_data = NULL; 80 desc->irq_data.handler_data = NULL;
81 desc->irq_data.msi_desc = NULL; 81 desc->irq_data.msi_desc = NULL;
82 desc->status = IRQ_DEFAULT_INIT_FLAGS; 82 irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS);
83 desc->istate = IRQS_DISABLED;
83 desc->handle_irq = handle_bad_irq; 84 desc->handle_irq = handle_bad_irq;
84 desc->depth = 1; 85 desc->depth = 1;
85 desc->irq_count = 0; 86 desc->irq_count = 0;
@@ -94,7 +95,7 @@ int nr_irqs = NR_IRQS;
94EXPORT_SYMBOL_GPL(nr_irqs); 95EXPORT_SYMBOL_GPL(nr_irqs);
95 96
96static DEFINE_MUTEX(sparse_irq_lock); 97static DEFINE_MUTEX(sparse_irq_lock);
97static DECLARE_BITMAP(allocated_irqs, NR_IRQS); 98static DECLARE_BITMAP(allocated_irqs, IRQ_BITMAP_BITS);
98 99
99#ifdef CONFIG_SPARSE_IRQ 100#ifdef CONFIG_SPARSE_IRQ
100 101
@@ -206,6 +207,14 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
206 return NULL; 207 return NULL;
207} 208}
208 209
210static int irq_expand_nr_irqs(unsigned int nr)
211{
212 if (nr > IRQ_BITMAP_BITS)
213 return -ENOMEM;
214 nr_irqs = nr;
215 return 0;
216}
217
209int __init early_irq_init(void) 218int __init early_irq_init(void)
210{ 219{
211 int i, initcnt, node = first_online_node; 220 int i, initcnt, node = first_online_node;
@@ -217,6 +226,15 @@ int __init early_irq_init(void)
217 initcnt = arch_probe_nr_irqs(); 226 initcnt = arch_probe_nr_irqs();
218 printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d %d\n", NR_IRQS, nr_irqs, initcnt); 227 printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d %d\n", NR_IRQS, nr_irqs, initcnt);
219 228
229 if (WARN_ON(nr_irqs > IRQ_BITMAP_BITS))
230 nr_irqs = IRQ_BITMAP_BITS;
231
232 if (WARN_ON(initcnt > IRQ_BITMAP_BITS))
233 initcnt = IRQ_BITMAP_BITS;
234
235 if (initcnt > nr_irqs)
236 nr_irqs = initcnt;
237
220 for (i = 0; i < initcnt; i++) { 238 for (i = 0; i < initcnt; i++) {
221 desc = alloc_desc(i, node); 239 desc = alloc_desc(i, node);
222 set_bit(i, allocated_irqs); 240 set_bit(i, allocated_irqs);
@@ -229,7 +247,7 @@ int __init early_irq_init(void)
229 247
230struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { 248struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
231 [0 ... NR_IRQS-1] = { 249 [0 ... NR_IRQS-1] = {
232 .status = IRQ_DEFAULT_INIT_FLAGS, 250 .istate = IRQS_DISABLED,
233 .handle_irq = handle_bad_irq, 251 .handle_irq = handle_bad_irq,
234 .depth = 1, 252 .depth = 1,
235 .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock), 253 .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock),
@@ -251,8 +269,8 @@ int __init early_irq_init(void)
251 for (i = 0; i < count; i++) { 269 for (i = 0; i < count; i++) {
252 desc[i].irq_data.irq = i; 270 desc[i].irq_data.irq = i;
253 desc[i].irq_data.chip = &no_irq_chip; 271 desc[i].irq_data.chip = &no_irq_chip;
254 /* TODO : do this allocation on-demand ... */
255 desc[i].kstat_irqs = alloc_percpu(unsigned int); 272 desc[i].kstat_irqs = alloc_percpu(unsigned int);
273 irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS);
256 alloc_masks(desc + i, GFP_KERNEL, node); 274 alloc_masks(desc + i, GFP_KERNEL, node);
257 desc_smp_init(desc + i, node); 275 desc_smp_init(desc + i, node);
258 lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); 276 lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
@@ -277,24 +295,14 @@ static void free_desc(unsigned int irq)
277 295
278static inline int alloc_descs(unsigned int start, unsigned int cnt, int node) 296static inline int alloc_descs(unsigned int start, unsigned int cnt, int node)
279{ 297{
280#if defined(CONFIG_KSTAT_IRQS_ONDEMAND)
281 struct irq_desc *desc;
282 unsigned int i;
283
284 for (i = 0; i < cnt; i++) {
285 desc = irq_to_desc(start + i);
286 if (desc && !desc->kstat_irqs) {
287 unsigned int __percpu *stats = alloc_percpu(unsigned int);
288
289 if (!stats)
290 return -1;
291 if (cmpxchg(&desc->kstat_irqs, NULL, stats) != NULL)
292 free_percpu(stats);
293 }
294 }
295#endif
296 return start; 298 return start;
297} 299}
300
301static int irq_expand_nr_irqs(unsigned int nr)
302{
303 return -ENOMEM;
304}
305
298#endif /* !CONFIG_SPARSE_IRQ */ 306#endif /* !CONFIG_SPARSE_IRQ */
299 307
300/* Dynamic interrupt handling */ 308/* Dynamic interrupt handling */
@@ -338,14 +346,17 @@ irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node)
338 346
339 mutex_lock(&sparse_irq_lock); 347 mutex_lock(&sparse_irq_lock);
340 348
341 start = bitmap_find_next_zero_area(allocated_irqs, nr_irqs, from, cnt, 0); 349 start = bitmap_find_next_zero_area(allocated_irqs, IRQ_BITMAP_BITS,
350 from, cnt, 0);
342 ret = -EEXIST; 351 ret = -EEXIST;
343 if (irq >=0 && start != irq) 352 if (irq >=0 && start != irq)
344 goto err; 353 goto err;
345 354
346 ret = -ENOMEM; 355 if (start + cnt > nr_irqs) {
347 if (start >= nr_irqs) 356 ret = irq_expand_nr_irqs(start + cnt);
348 goto err; 357 if (ret)
358 goto err;
359 }
349 360
350 bitmap_set(allocated_irqs, start, cnt); 361 bitmap_set(allocated_irqs, start, cnt);
351 mutex_unlock(&sparse_irq_lock); 362 mutex_unlock(&sparse_irq_lock);
@@ -392,6 +403,26 @@ unsigned int irq_get_next_irq(unsigned int offset)
392 return find_next_bit(allocated_irqs, nr_irqs, offset); 403 return find_next_bit(allocated_irqs, nr_irqs, offset);
393} 404}
394 405
406struct irq_desc *
407__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus)
408{
409 struct irq_desc *desc = irq_to_desc(irq);
410
411 if (desc) {
412 if (bus)
413 chip_bus_lock(desc);
414 raw_spin_lock_irqsave(&desc->lock, *flags);
415 }
416 return desc;
417}
418
419void __irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags, bool bus)
420{
421 raw_spin_unlock_irqrestore(&desc->lock, flags);
422 if (bus)
423 chip_bus_sync_unlock(desc);
424}
425
395/** 426/**
396 * dynamic_irq_cleanup - cleanup a dynamically allocated irq 427 * dynamic_irq_cleanup - cleanup a dynamically allocated irq
397 * @irq: irq number to initialize 428 * @irq: irq number to initialize
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0caa59f747dd..acd599a43bfb 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -17,6 +17,17 @@
17 17
18#include "internals.h" 18#include "internals.h"
19 19
20#ifdef CONFIG_IRQ_FORCED_THREADING
21__read_mostly bool force_irqthreads;
22
23static int __init setup_forced_irqthreads(char *arg)
24{
25 force_irqthreads = true;
26 return 0;
27}
28early_param("threadirqs", setup_forced_irqthreads);
29#endif
30
20/** 31/**
21 * synchronize_irq - wait for pending IRQ handlers (on other CPUs) 32 * synchronize_irq - wait for pending IRQ handlers (on other CPUs)
22 * @irq: interrupt number to wait for 33 * @irq: interrupt number to wait for
@@ -30,7 +41,7 @@
30void synchronize_irq(unsigned int irq) 41void synchronize_irq(unsigned int irq)
31{ 42{
32 struct irq_desc *desc = irq_to_desc(irq); 43 struct irq_desc *desc = irq_to_desc(irq);
33 unsigned int status; 44 unsigned int state;
34 45
35 if (!desc) 46 if (!desc)
36 return; 47 return;
@@ -42,16 +53,16 @@ void synchronize_irq(unsigned int irq)
42 * Wait until we're out of the critical section. This might 53 * Wait until we're out of the critical section. This might
43 * give the wrong answer due to the lack of memory barriers. 54 * give the wrong answer due to the lack of memory barriers.
44 */ 55 */
45 while (desc->status & IRQ_INPROGRESS) 56 while (desc->istate & IRQS_INPROGRESS)
46 cpu_relax(); 57 cpu_relax();
47 58
48 /* Ok, that indicated we're done: double-check carefully. */ 59 /* Ok, that indicated we're done: double-check carefully. */
49 raw_spin_lock_irqsave(&desc->lock, flags); 60 raw_spin_lock_irqsave(&desc->lock, flags);
50 status = desc->status; 61 state = desc->istate;
51 raw_spin_unlock_irqrestore(&desc->lock, flags); 62 raw_spin_unlock_irqrestore(&desc->lock, flags);
52 63
53 /* Oops, that failed? */ 64 /* Oops, that failed? */
54 } while (status & IRQ_INPROGRESS); 65 } while (state & IRQS_INPROGRESS);
55 66
56 /* 67 /*
57 * We made sure that no hardirq handler is running. Now verify 68 * We made sure that no hardirq handler is running. Now verify
@@ -73,8 +84,8 @@ int irq_can_set_affinity(unsigned int irq)
73{ 84{
74 struct irq_desc *desc = irq_to_desc(irq); 85 struct irq_desc *desc = irq_to_desc(irq);
75 86
76 if (CHECK_IRQ_PER_CPU(desc->status) || !desc->irq_data.chip || 87 if (!desc || !irqd_can_balance(&desc->irq_data) ||
77 !desc->irq_data.chip->irq_set_affinity) 88 !desc->irq_data.chip || !desc->irq_data.chip->irq_set_affinity)
78 return 0; 89 return 0;
79 90
80 return 1; 91 return 1;
@@ -100,67 +111,169 @@ void irq_set_thread_affinity(struct irq_desc *desc)
100 } 111 }
101} 112}
102 113
114#ifdef CONFIG_GENERIC_PENDING_IRQ
115static inline bool irq_can_move_pcntxt(struct irq_desc *desc)
116{
117 return irq_settings_can_move_pcntxt(desc);
118}
119static inline bool irq_move_pending(struct irq_desc *desc)
120{
121 return irqd_is_setaffinity_pending(&desc->irq_data);
122}
123static inline void
124irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask)
125{
126 cpumask_copy(desc->pending_mask, mask);
127}
128static inline void
129irq_get_pending(struct cpumask *mask, struct irq_desc *desc)
130{
131 cpumask_copy(mask, desc->pending_mask);
132}
133#else
134static inline bool irq_can_move_pcntxt(struct irq_desc *desc) { return true; }
135static inline bool irq_move_pending(struct irq_desc *desc) { return false; }
136static inline void
137irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask) { }
138static inline void
139irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { }
140#endif
141
103/** 142/**
104 * irq_set_affinity - Set the irq affinity of a given irq 143 * irq_set_affinity - Set the irq affinity of a given irq
105 * @irq: Interrupt to set affinity 144 * @irq: Interrupt to set affinity
106 * @cpumask: cpumask 145 * @cpumask: cpumask
107 * 146 *
108 */ 147 */
109int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) 148int irq_set_affinity(unsigned int irq, const struct cpumask *mask)
110{ 149{
111 struct irq_desc *desc = irq_to_desc(irq); 150 struct irq_desc *desc = irq_to_desc(irq);
112 struct irq_chip *chip = desc->irq_data.chip; 151 struct irq_chip *chip = desc->irq_data.chip;
113 unsigned long flags; 152 unsigned long flags;
153 int ret = 0;
114 154
115 if (!chip->irq_set_affinity) 155 if (!chip->irq_set_affinity)
116 return -EINVAL; 156 return -EINVAL;
117 157
118 raw_spin_lock_irqsave(&desc->lock, flags); 158 raw_spin_lock_irqsave(&desc->lock, flags);
119 159
120#ifdef CONFIG_GENERIC_PENDING_IRQ 160 if (irq_can_move_pcntxt(desc)) {
121 if (desc->status & IRQ_MOVE_PCNTXT) { 161 ret = chip->irq_set_affinity(&desc->irq_data, mask, false);
122 if (!chip->irq_set_affinity(&desc->irq_data, cpumask, false)) { 162 switch (ret) {
123 cpumask_copy(desc->irq_data.affinity, cpumask); 163 case IRQ_SET_MASK_OK:
164 cpumask_copy(desc->irq_data.affinity, mask);
165 case IRQ_SET_MASK_OK_NOCOPY:
124 irq_set_thread_affinity(desc); 166 irq_set_thread_affinity(desc);
167 ret = 0;
125 } 168 }
169 } else {
170 irqd_set_move_pending(&desc->irq_data);
171 irq_copy_pending(desc, mask);
126 } 172 }
127 else { 173
128 desc->status |= IRQ_MOVE_PENDING; 174 if (desc->affinity_notify) {
129 cpumask_copy(desc->pending_mask, cpumask); 175 kref_get(&desc->affinity_notify->kref);
130 } 176 schedule_work(&desc->affinity_notify->work);
131#else
132 if (!chip->irq_set_affinity(&desc->irq_data, cpumask, false)) {
133 cpumask_copy(desc->irq_data.affinity, cpumask);
134 irq_set_thread_affinity(desc);
135 } 177 }
136#endif 178 irq_compat_set_affinity(desc);
137 desc->status |= IRQ_AFFINITY_SET; 179 irqd_set(&desc->irq_data, IRQD_AFFINITY_SET);
138 raw_spin_unlock_irqrestore(&desc->lock, flags); 180 raw_spin_unlock_irqrestore(&desc->lock, flags);
139 return 0; 181 return ret;
140} 182}
141 183
142int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) 184int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
143{ 185{
186 unsigned long flags;
187 struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
188
189 if (!desc)
190 return -EINVAL;
191 desc->affinity_hint = m;
192 irq_put_desc_unlock(desc, flags);
193 return 0;
194}
195EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
196
197static void irq_affinity_notify(struct work_struct *work)
198{
199 struct irq_affinity_notify *notify =
200 container_of(work, struct irq_affinity_notify, work);
201 struct irq_desc *desc = irq_to_desc(notify->irq);
202 cpumask_var_t cpumask;
203 unsigned long flags;
204
205 if (!desc || !alloc_cpumask_var(&cpumask, GFP_KERNEL))
206 goto out;
207
208 raw_spin_lock_irqsave(&desc->lock, flags);
209 if (irq_move_pending(desc))
210 irq_get_pending(cpumask, desc);
211 else
212 cpumask_copy(cpumask, desc->irq_data.affinity);
213 raw_spin_unlock_irqrestore(&desc->lock, flags);
214
215 notify->notify(notify, cpumask);
216
217 free_cpumask_var(cpumask);
218out:
219 kref_put(&notify->kref, notify->release);
220}
221
222/**
223 * irq_set_affinity_notifier - control notification of IRQ affinity changes
224 * @irq: Interrupt for which to enable/disable notification
225 * @notify: Context for notification, or %NULL to disable
226 * notification. Function pointers must be initialised;
227 * the other fields will be initialised by this function.
228 *
229 * Must be called in process context. Notification may only be enabled
230 * after the IRQ is allocated and must be disabled before the IRQ is
231 * freed using free_irq().
232 */
233int
234irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
235{
144 struct irq_desc *desc = irq_to_desc(irq); 236 struct irq_desc *desc = irq_to_desc(irq);
237 struct irq_affinity_notify *old_notify;
145 unsigned long flags; 238 unsigned long flags;
146 239
240 /* The release function is promised process context */
241 might_sleep();
242
147 if (!desc) 243 if (!desc)
148 return -EINVAL; 244 return -EINVAL;
149 245
246 /* Complete initialisation of *notify */
247 if (notify) {
248 notify->irq = irq;
249 kref_init(&notify->kref);
250 INIT_WORK(&notify->work, irq_affinity_notify);
251 }
252
150 raw_spin_lock_irqsave(&desc->lock, flags); 253 raw_spin_lock_irqsave(&desc->lock, flags);
151 desc->affinity_hint = m; 254 old_notify = desc->affinity_notify;
255 desc->affinity_notify = notify;
152 raw_spin_unlock_irqrestore(&desc->lock, flags); 256 raw_spin_unlock_irqrestore(&desc->lock, flags);
153 257
258 if (old_notify)
259 kref_put(&old_notify->kref, old_notify->release);
260
154 return 0; 261 return 0;
155} 262}
156EXPORT_SYMBOL_GPL(irq_set_affinity_hint); 263EXPORT_SYMBOL_GPL(irq_set_affinity_notifier);
157 264
158#ifndef CONFIG_AUTO_IRQ_AFFINITY 265#ifndef CONFIG_AUTO_IRQ_AFFINITY
159/* 266/*
160 * Generic version of the affinity autoselector. 267 * Generic version of the affinity autoselector.
161 */ 268 */
162static int setup_affinity(unsigned int irq, struct irq_desc *desc) 269static int
270setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
163{ 271{
272 struct irq_chip *chip = irq_desc_get_chip(desc);
273 struct cpumask *set = irq_default_affinity;
274 int ret;
275
276 /* Excludes PER_CPU and NO_BALANCE interrupts */
164 if (!irq_can_set_affinity(irq)) 277 if (!irq_can_set_affinity(irq))
165 return 0; 278 return 0;
166 279
@@ -168,22 +281,29 @@ static int setup_affinity(unsigned int irq, struct irq_desc *desc)
168 * Preserve an userspace affinity setup, but make sure that 281 * Preserve an userspace affinity setup, but make sure that
169 * one of the targets is online. 282 * one of the targets is online.
170 */ 283 */
171 if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) { 284 if (irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) {
172 if (cpumask_any_and(desc->irq_data.affinity, cpu_online_mask) 285 if (cpumask_intersects(desc->irq_data.affinity,
173 < nr_cpu_ids) 286 cpu_online_mask))
174 goto set_affinity; 287 set = desc->irq_data.affinity;
175 else 288 else {
176 desc->status &= ~IRQ_AFFINITY_SET; 289 irq_compat_clr_affinity(desc);
290 irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET);
291 }
177 } 292 }
178 293
179 cpumask_and(desc->irq_data.affinity, cpu_online_mask, irq_default_affinity); 294 cpumask_and(mask, cpu_online_mask, set);
180set_affinity: 295 ret = chip->irq_set_affinity(&desc->irq_data, mask, false);
181 desc->irq_data.chip->irq_set_affinity(&desc->irq_data, desc->irq_data.affinity, false); 296 switch (ret) {
182 297 case IRQ_SET_MASK_OK:
298 cpumask_copy(desc->irq_data.affinity, mask);
299 case IRQ_SET_MASK_OK_NOCOPY:
300 irq_set_thread_affinity(desc);
301 }
183 return 0; 302 return 0;
184} 303}
185#else 304#else
186static inline int setup_affinity(unsigned int irq, struct irq_desc *d) 305static inline int
306setup_affinity(unsigned int irq, struct irq_desc *d, struct cpumask *mask)
187{ 307{
188 return irq_select_affinity(irq); 308 return irq_select_affinity(irq);
189} 309}
@@ -192,23 +312,21 @@ static inline int setup_affinity(unsigned int irq, struct irq_desc *d)
192/* 312/*
193 * Called when affinity is set via /proc/irq 313 * Called when affinity is set via /proc/irq
194 */ 314 */
195int irq_select_affinity_usr(unsigned int irq) 315int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask)
196{ 316{
197 struct irq_desc *desc = irq_to_desc(irq); 317 struct irq_desc *desc = irq_to_desc(irq);
198 unsigned long flags; 318 unsigned long flags;
199 int ret; 319 int ret;
200 320
201 raw_spin_lock_irqsave(&desc->lock, flags); 321 raw_spin_lock_irqsave(&desc->lock, flags);
202 ret = setup_affinity(irq, desc); 322 ret = setup_affinity(irq, desc, mask);
203 if (!ret)
204 irq_set_thread_affinity(desc);
205 raw_spin_unlock_irqrestore(&desc->lock, flags); 323 raw_spin_unlock_irqrestore(&desc->lock, flags);
206
207 return ret; 324 return ret;
208} 325}
209 326
210#else 327#else
211static inline int setup_affinity(unsigned int irq, struct irq_desc *desc) 328static inline int
329setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
212{ 330{
213 return 0; 331 return 0;
214} 332}
@@ -219,13 +337,23 @@ void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
219 if (suspend) { 337 if (suspend) {
220 if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND)) 338 if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND))
221 return; 339 return;
222 desc->status |= IRQ_SUSPENDED; 340 desc->istate |= IRQS_SUSPENDED;
223 } 341 }
224 342
225 if (!desc->depth++) { 343 if (!desc->depth++)
226 desc->status |= IRQ_DISABLED; 344 irq_disable(desc);
227 desc->irq_data.chip->irq_disable(&desc->irq_data); 345}
228 } 346
347static int __disable_irq_nosync(unsigned int irq)
348{
349 unsigned long flags;
350 struct irq_desc *desc = irq_get_desc_buslock(irq, &flags);
351
352 if (!desc)
353 return -EINVAL;
354 __disable_irq(desc, irq, false);
355 irq_put_desc_busunlock(desc, flags);
356 return 0;
229} 357}
230 358
231/** 359/**
@@ -241,17 +369,7 @@ void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
241 */ 369 */
242void disable_irq_nosync(unsigned int irq) 370void disable_irq_nosync(unsigned int irq)
243{ 371{
244 struct irq_desc *desc = irq_to_desc(irq); 372 __disable_irq_nosync(irq);
245 unsigned long flags;
246
247 if (!desc)
248 return;
249
250 chip_bus_lock(desc);
251 raw_spin_lock_irqsave(&desc->lock, flags);
252 __disable_irq(desc, irq, false);
253 raw_spin_unlock_irqrestore(&desc->lock, flags);
254 chip_bus_sync_unlock(desc);
255} 373}
256EXPORT_SYMBOL(disable_irq_nosync); 374EXPORT_SYMBOL(disable_irq_nosync);
257 375
@@ -269,21 +387,24 @@ EXPORT_SYMBOL(disable_irq_nosync);
269 */ 387 */
270void disable_irq(unsigned int irq) 388void disable_irq(unsigned int irq)
271{ 389{
272 struct irq_desc *desc = irq_to_desc(irq); 390 if (!__disable_irq_nosync(irq))
273
274 if (!desc)
275 return;
276
277 disable_irq_nosync(irq);
278 if (desc->action)
279 synchronize_irq(irq); 391 synchronize_irq(irq);
280} 392}
281EXPORT_SYMBOL(disable_irq); 393EXPORT_SYMBOL(disable_irq);
282 394
283void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume) 395void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
284{ 396{
285 if (resume) 397 if (resume) {
286 desc->status &= ~IRQ_SUSPENDED; 398 if (!(desc->istate & IRQS_SUSPENDED)) {
399 if (!desc->action)
400 return;
401 if (!(desc->action->flags & IRQF_FORCE_RESUME))
402 return;
403 /* Pretend that it got disabled ! */
404 desc->depth++;
405 }
406 desc->istate &= ~IRQS_SUSPENDED;
407 }
287 408
288 switch (desc->depth) { 409 switch (desc->depth) {
289 case 0: 410 case 0:
@@ -291,12 +412,11 @@ void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
291 WARN(1, KERN_WARNING "Unbalanced enable for IRQ %d\n", irq); 412 WARN(1, KERN_WARNING "Unbalanced enable for IRQ %d\n", irq);
292 break; 413 break;
293 case 1: { 414 case 1: {
294 unsigned int status = desc->status & ~IRQ_DISABLED; 415 if (desc->istate & IRQS_SUSPENDED)
295
296 if (desc->status & IRQ_SUSPENDED)
297 goto err_out; 416 goto err_out;
298 /* Prevent probing on this irq: */ 417 /* Prevent probing on this irq: */
299 desc->status = status | IRQ_NOPROBE; 418 irq_settings_set_noprobe(desc);
419 irq_enable(desc);
300 check_irq_resend(desc, irq); 420 check_irq_resend(desc, irq);
301 /* fall-through */ 421 /* fall-through */
302 } 422 }
@@ -318,21 +438,18 @@ void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
318 */ 438 */
319void enable_irq(unsigned int irq) 439void enable_irq(unsigned int irq)
320{ 440{
321 struct irq_desc *desc = irq_to_desc(irq);
322 unsigned long flags; 441 unsigned long flags;
442 struct irq_desc *desc = irq_get_desc_buslock(irq, &flags);
323 443
324 if (!desc) 444 if (!desc)
325 return; 445 return;
446 if (WARN(!desc->irq_data.chip,
447 KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq))
448 goto out;
326 449
327 if (WARN(!desc->irq_data.chip || !desc->irq_data.chip->irq_enable,
328 KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq))
329 return;
330
331 chip_bus_lock(desc);
332 raw_spin_lock_irqsave(&desc->lock, flags);
333 __enable_irq(desc, irq, false); 450 __enable_irq(desc, irq, false);
334 raw_spin_unlock_irqrestore(&desc->lock, flags); 451out:
335 chip_bus_sync_unlock(desc); 452 irq_put_desc_busunlock(desc, flags);
336} 453}
337EXPORT_SYMBOL(enable_irq); 454EXPORT_SYMBOL(enable_irq);
338 455
@@ -348,7 +465,7 @@ static int set_irq_wake_real(unsigned int irq, unsigned int on)
348} 465}
349 466
350/** 467/**
351 * set_irq_wake - control irq power management wakeup 468 * irq_set_irq_wake - control irq power management wakeup
352 * @irq: interrupt to control 469 * @irq: interrupt to control
353 * @on: enable/disable power management wakeup 470 * @on: enable/disable power management wakeup
354 * 471 *
@@ -359,23 +476,22 @@ static int set_irq_wake_real(unsigned int irq, unsigned int on)
359 * Wakeup mode lets this IRQ wake the system from sleep 476 * Wakeup mode lets this IRQ wake the system from sleep
360 * states like "suspend to RAM". 477 * states like "suspend to RAM".
361 */ 478 */
362int set_irq_wake(unsigned int irq, unsigned int on) 479int irq_set_irq_wake(unsigned int irq, unsigned int on)
363{ 480{
364 struct irq_desc *desc = irq_to_desc(irq);
365 unsigned long flags; 481 unsigned long flags;
482 struct irq_desc *desc = irq_get_desc_buslock(irq, &flags);
366 int ret = 0; 483 int ret = 0;
367 484
368 /* wakeup-capable irqs can be shared between drivers that 485 /* wakeup-capable irqs can be shared between drivers that
369 * don't need to have the same sleep mode behaviors. 486 * don't need to have the same sleep mode behaviors.
370 */ 487 */
371 raw_spin_lock_irqsave(&desc->lock, flags);
372 if (on) { 488 if (on) {
373 if (desc->wake_depth++ == 0) { 489 if (desc->wake_depth++ == 0) {
374 ret = set_irq_wake_real(irq, on); 490 ret = set_irq_wake_real(irq, on);
375 if (ret) 491 if (ret)
376 desc->wake_depth = 0; 492 desc->wake_depth = 0;
377 else 493 else
378 desc->status |= IRQ_WAKEUP; 494 irqd_set(&desc->irq_data, IRQD_WAKEUP_STATE);
379 } 495 }
380 } else { 496 } else {
381 if (desc->wake_depth == 0) { 497 if (desc->wake_depth == 0) {
@@ -385,14 +501,13 @@ int set_irq_wake(unsigned int irq, unsigned int on)
385 if (ret) 501 if (ret)
386 desc->wake_depth = 1; 502 desc->wake_depth = 1;
387 else 503 else
388 desc->status &= ~IRQ_WAKEUP; 504 irqd_clear(&desc->irq_data, IRQD_WAKEUP_STATE);
389 } 505 }
390 } 506 }
391 507 irq_put_desc_busunlock(desc, flags);
392 raw_spin_unlock_irqrestore(&desc->lock, flags);
393 return ret; 508 return ret;
394} 509}
395EXPORT_SYMBOL(set_irq_wake); 510EXPORT_SYMBOL(irq_set_irq_wake);
396 511
397/* 512/*
398 * Internal function that tells the architecture code whether a 513 * Internal function that tells the architecture code whether a
@@ -401,43 +516,27 @@ EXPORT_SYMBOL(set_irq_wake);
401 */ 516 */
402int can_request_irq(unsigned int irq, unsigned long irqflags) 517int can_request_irq(unsigned int irq, unsigned long irqflags)
403{ 518{
404 struct irq_desc *desc = irq_to_desc(irq);
405 struct irqaction *action;
406 unsigned long flags; 519 unsigned long flags;
520 struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
521 int canrequest = 0;
407 522
408 if (!desc) 523 if (!desc)
409 return 0; 524 return 0;
410 525
411 if (desc->status & IRQ_NOREQUEST) 526 if (irq_settings_can_request(desc)) {
412 return 0; 527 if (desc->action)
413 528 if (irqflags & desc->action->flags & IRQF_SHARED)
414 raw_spin_lock_irqsave(&desc->lock, flags); 529 canrequest =1;
415 action = desc->action; 530 }
416 if (action) 531 irq_put_desc_unlock(desc, flags);
417 if (irqflags & action->flags & IRQF_SHARED) 532 return canrequest;
418 action = NULL;
419
420 raw_spin_unlock_irqrestore(&desc->lock, flags);
421
422 return !action;
423}
424
425void compat_irq_chip_set_default_handler(struct irq_desc *desc)
426{
427 /*
428 * If the architecture still has not overriden
429 * the flow handler then zap the default. This
430 * should catch incorrect flow-type setting.
431 */
432 if (desc->handle_irq == &handle_bad_irq)
433 desc->handle_irq = NULL;
434} 533}
435 534
436int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, 535int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
437 unsigned long flags) 536 unsigned long flags)
438{ 537{
439 int ret;
440 struct irq_chip *chip = desc->irq_data.chip; 538 struct irq_chip *chip = desc->irq_data.chip;
539 int ret, unmask = 0;
441 540
442 if (!chip || !chip->irq_set_type) { 541 if (!chip || !chip->irq_set_type) {
443 /* 542 /*
@@ -449,23 +548,43 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
449 return 0; 548 return 0;
450 } 549 }
451 550
551 flags &= IRQ_TYPE_SENSE_MASK;
552
553 if (chip->flags & IRQCHIP_SET_TYPE_MASKED) {
554 if (!(desc->istate & IRQS_MASKED))
555 mask_irq(desc);
556 if (!(desc->istate & IRQS_DISABLED))
557 unmask = 1;
558 }
559
452 /* caller masked out all except trigger mode flags */ 560 /* caller masked out all except trigger mode flags */
453 ret = chip->irq_set_type(&desc->irq_data, flags); 561 ret = chip->irq_set_type(&desc->irq_data, flags);
454 562
455 if (ret) 563 switch (ret) {
456 pr_err("setting trigger mode %lu for irq %u failed (%pF)\n", 564 case IRQ_SET_MASK_OK:
457 flags, irq, chip->irq_set_type); 565 irqd_clear(&desc->irq_data, IRQD_TRIGGER_MASK);
458 else { 566 irqd_set(&desc->irq_data, flags);
459 if (flags & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH)) 567
460 flags |= IRQ_LEVEL; 568 case IRQ_SET_MASK_OK_NOCOPY:
461 /* note that IRQF_TRIGGER_MASK == IRQ_TYPE_SENSE_MASK */ 569 flags = irqd_get_trigger_type(&desc->irq_data);
462 desc->status &= ~(IRQ_LEVEL | IRQ_TYPE_SENSE_MASK); 570 irq_settings_set_trigger_mask(desc, flags);
463 desc->status |= flags; 571 irqd_clear(&desc->irq_data, IRQD_LEVEL);
572 irq_settings_clr_level(desc);
573 if (flags & IRQ_TYPE_LEVEL_MASK) {
574 irq_settings_set_level(desc);
575 irqd_set(&desc->irq_data, IRQD_LEVEL);
576 }
464 577
465 if (chip != desc->irq_data.chip) 578 if (chip != desc->irq_data.chip)
466 irq_chip_set_defaults(desc->irq_data.chip); 579 irq_chip_set_defaults(desc->irq_data.chip);
580 ret = 0;
581 break;
582 default:
583 pr_err("setting trigger mode %lu for irq %u failed (%pF)\n",
584 flags, irq, chip->irq_set_type);
467 } 585 }
468 586 if (unmask)
587 unmask_irq(desc);
469 return ret; 588 return ret;
470} 589}
471 590
@@ -509,8 +628,11 @@ static int irq_wait_for_interrupt(struct irqaction *action)
509 * handler finished. unmask if the interrupt has not been disabled and 628 * handler finished. unmask if the interrupt has not been disabled and
510 * is marked MASKED. 629 * is marked MASKED.
511 */ 630 */
512static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc) 631static void irq_finalize_oneshot(struct irq_desc *desc,
632 struct irqaction *action, bool force)
513{ 633{
634 if (!(desc->istate & IRQS_ONESHOT))
635 return;
514again: 636again:
515 chip_bus_lock(desc); 637 chip_bus_lock(desc);
516 raw_spin_lock_irq(&desc->lock); 638 raw_spin_lock_irq(&desc->lock);
@@ -522,26 +644,44 @@ again:
522 * The thread is faster done than the hard interrupt handler 644 * The thread is faster done than the hard interrupt handler
523 * on the other CPU. If we unmask the irq line then the 645 * on the other CPU. If we unmask the irq line then the
524 * interrupt can come in again and masks the line, leaves due 646 * interrupt can come in again and masks the line, leaves due
525 * to IRQ_INPROGRESS and the irq line is masked forever. 647 * to IRQS_INPROGRESS and the irq line is masked forever.
648 *
649 * This also serializes the state of shared oneshot handlers
650 * versus "desc->threads_onehsot |= action->thread_mask;" in
651 * irq_wake_thread(). See the comment there which explains the
652 * serialization.
526 */ 653 */
527 if (unlikely(desc->status & IRQ_INPROGRESS)) { 654 if (unlikely(desc->istate & IRQS_INPROGRESS)) {
528 raw_spin_unlock_irq(&desc->lock); 655 raw_spin_unlock_irq(&desc->lock);
529 chip_bus_sync_unlock(desc); 656 chip_bus_sync_unlock(desc);
530 cpu_relax(); 657 cpu_relax();
531 goto again; 658 goto again;
532 } 659 }
533 660
534 if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) { 661 /*
535 desc->status &= ~IRQ_MASKED; 662 * Now check again, whether the thread should run. Otherwise
663 * we would clear the threads_oneshot bit of this thread which
664 * was just set.
665 */
666 if (!force && test_bit(IRQTF_RUNTHREAD, &action->thread_flags))
667 goto out_unlock;
668
669 desc->threads_oneshot &= ~action->thread_mask;
670
671 if (!desc->threads_oneshot && !(desc->istate & IRQS_DISABLED) &&
672 (desc->istate & IRQS_MASKED)) {
673 irq_compat_clr_masked(desc);
674 desc->istate &= ~IRQS_MASKED;
536 desc->irq_data.chip->irq_unmask(&desc->irq_data); 675 desc->irq_data.chip->irq_unmask(&desc->irq_data);
537 } 676 }
677out_unlock:
538 raw_spin_unlock_irq(&desc->lock); 678 raw_spin_unlock_irq(&desc->lock);
539 chip_bus_sync_unlock(desc); 679 chip_bus_sync_unlock(desc);
540} 680}
541 681
542#ifdef CONFIG_SMP 682#ifdef CONFIG_SMP
543/* 683/*
544 * Check whether we need to change the affinity of the interrupt thread. 684 * Check whether we need to chasnge the affinity of the interrupt thread.
545 */ 685 */
546static void 686static void
547irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) 687irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action)
@@ -573,6 +713,32 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { }
573#endif 713#endif
574 714
575/* 715/*
716 * Interrupts which are not explicitely requested as threaded
717 * interrupts rely on the implicit bh/preempt disable of the hard irq
718 * context. So we need to disable bh here to avoid deadlocks and other
719 * side effects.
720 */
721static void
722irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action)
723{
724 local_bh_disable();
725 action->thread_fn(action->irq, action->dev_id);
726 irq_finalize_oneshot(desc, action, false);
727 local_bh_enable();
728}
729
730/*
731 * Interrupts explicitely requested as threaded interupts want to be
732 * preemtible - many of them need to sleep and wait for slow busses to
733 * complete.
734 */
735static void irq_thread_fn(struct irq_desc *desc, struct irqaction *action)
736{
737 action->thread_fn(action->irq, action->dev_id);
738 irq_finalize_oneshot(desc, action, false);
739}
740
741/*
576 * Interrupt handler thread 742 * Interrupt handler thread
577 */ 743 */
578static int irq_thread(void *data) 744static int irq_thread(void *data)
@@ -582,7 +748,14 @@ static int irq_thread(void *data)
582 }; 748 };
583 struct irqaction *action = data; 749 struct irqaction *action = data;
584 struct irq_desc *desc = irq_to_desc(action->irq); 750 struct irq_desc *desc = irq_to_desc(action->irq);
585 int wake, oneshot = desc->status & IRQ_ONESHOT; 751 void (*handler_fn)(struct irq_desc *desc, struct irqaction *action);
752 int wake;
753
754 if (force_irqthreads & test_bit(IRQTF_FORCED_THREAD,
755 &action->thread_flags))
756 handler_fn = irq_forced_thread_fn;
757 else
758 handler_fn = irq_thread_fn;
586 759
587 sched_setscheduler(current, SCHED_FIFO, &param); 760 sched_setscheduler(current, SCHED_FIFO, &param);
588 current->irqaction = action; 761 current->irqaction = action;
@@ -594,23 +767,20 @@ static int irq_thread(void *data)
594 atomic_inc(&desc->threads_active); 767 atomic_inc(&desc->threads_active);
595 768
596 raw_spin_lock_irq(&desc->lock); 769 raw_spin_lock_irq(&desc->lock);
597 if (unlikely(desc->status & IRQ_DISABLED)) { 770 if (unlikely(desc->istate & IRQS_DISABLED)) {
598 /* 771 /*
599 * CHECKME: We might need a dedicated 772 * CHECKME: We might need a dedicated
600 * IRQ_THREAD_PENDING flag here, which 773 * IRQ_THREAD_PENDING flag here, which
601 * retriggers the thread in check_irq_resend() 774 * retriggers the thread in check_irq_resend()
602 * but AFAICT IRQ_PENDING should be fine as it 775 * but AFAICT IRQS_PENDING should be fine as it
603 * retriggers the interrupt itself --- tglx 776 * retriggers the interrupt itself --- tglx
604 */ 777 */
605 desc->status |= IRQ_PENDING; 778 irq_compat_set_pending(desc);
779 desc->istate |= IRQS_PENDING;
606 raw_spin_unlock_irq(&desc->lock); 780 raw_spin_unlock_irq(&desc->lock);
607 } else { 781 } else {
608 raw_spin_unlock_irq(&desc->lock); 782 raw_spin_unlock_irq(&desc->lock);
609 783 handler_fn(desc, action);
610 action->thread_fn(action->irq, action->dev_id);
611
612 if (oneshot)
613 irq_finalize_oneshot(action->irq, desc);
614 } 784 }
615 785
616 wake = atomic_dec_and_test(&desc->threads_active); 786 wake = atomic_dec_and_test(&desc->threads_active);
@@ -619,6 +789,9 @@ static int irq_thread(void *data)
619 wake_up(&desc->wait_for_threads); 789 wake_up(&desc->wait_for_threads);
620 } 790 }
621 791
792 /* Prevent a stale desc->threads_oneshot */
793 irq_finalize_oneshot(desc, action, true);
794
622 /* 795 /*
623 * Clear irqaction. Otherwise exit_irq_thread() would make 796 * Clear irqaction. Otherwise exit_irq_thread() would make
624 * fuzz about an active irq thread going into nirvana. 797 * fuzz about an active irq thread going into nirvana.
@@ -633,6 +806,7 @@ static int irq_thread(void *data)
633void exit_irq_thread(void) 806void exit_irq_thread(void)
634{ 807{
635 struct task_struct *tsk = current; 808 struct task_struct *tsk = current;
809 struct irq_desc *desc;
636 810
637 if (!tsk->irqaction) 811 if (!tsk->irqaction)
638 return; 812 return;
@@ -641,6 +815,14 @@ void exit_irq_thread(void)
641 "exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n", 815 "exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
642 tsk->comm ? tsk->comm : "", tsk->pid, tsk->irqaction->irq); 816 tsk->comm ? tsk->comm : "", tsk->pid, tsk->irqaction->irq);
643 817
818 desc = irq_to_desc(tsk->irqaction->irq);
819
820 /*
821 * Prevent a stale desc->threads_oneshot. Must be called
822 * before setting the IRQTF_DIED flag.
823 */
824 irq_finalize_oneshot(desc, tsk->irqaction, true);
825
644 /* 826 /*
645 * Set the THREAD DIED flag to prevent further wakeups of the 827 * Set the THREAD DIED flag to prevent further wakeups of the
646 * soon to be gone threaded handler. 828 * soon to be gone threaded handler.
@@ -648,6 +830,22 @@ void exit_irq_thread(void)
648 set_bit(IRQTF_DIED, &tsk->irqaction->flags); 830 set_bit(IRQTF_DIED, &tsk->irqaction->flags);
649} 831}
650 832
833static void irq_setup_forced_threading(struct irqaction *new)
834{
835 if (!force_irqthreads)
836 return;
837 if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT))
838 return;
839
840 new->flags |= IRQF_ONESHOT;
841
842 if (!new->thread_fn) {
843 set_bit(IRQTF_FORCED_THREAD, &new->thread_flags);
844 new->thread_fn = new->handler;
845 new->handler = irq_default_primary_handler;
846 }
847}
848
651/* 849/*
652 * Internal function to register an irqaction - typically used to 850 * Internal function to register an irqaction - typically used to
653 * allocate special interrupts that are part of the architecture. 851 * allocate special interrupts that are part of the architecture.
@@ -657,9 +855,9 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
657{ 855{
658 struct irqaction *old, **old_ptr; 856 struct irqaction *old, **old_ptr;
659 const char *old_name = NULL; 857 const char *old_name = NULL;
660 unsigned long flags; 858 unsigned long flags, thread_mask = 0;
661 int nested, shared = 0; 859 int ret, nested, shared = 0;
662 int ret; 860 cpumask_var_t mask;
663 861
664 if (!desc) 862 if (!desc)
665 return -EINVAL; 863 return -EINVAL;
@@ -683,15 +881,11 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
683 rand_initialize_irq(irq); 881 rand_initialize_irq(irq);
684 } 882 }
685 883
686 /* Oneshot interrupts are not allowed with shared */
687 if ((new->flags & IRQF_ONESHOT) && (new->flags & IRQF_SHARED))
688 return -EINVAL;
689
690 /* 884 /*
691 * Check whether the interrupt nests into another interrupt 885 * Check whether the interrupt nests into another interrupt
692 * thread. 886 * thread.
693 */ 887 */
694 nested = desc->status & IRQ_NESTED_THREAD; 888 nested = irq_settings_is_nested_thread(desc);
695 if (nested) { 889 if (nested) {
696 if (!new->thread_fn) 890 if (!new->thread_fn)
697 return -EINVAL; 891 return -EINVAL;
@@ -701,6 +895,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
701 * dummy function which warns when called. 895 * dummy function which warns when called.
702 */ 896 */
703 new->handler = irq_nested_primary_handler; 897 new->handler = irq_nested_primary_handler;
898 } else {
899 irq_setup_forced_threading(new);
704 } 900 }
705 901
706 /* 902 /*
@@ -724,6 +920,11 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
724 new->thread = t; 920 new->thread = t;
725 } 921 }
726 922
923 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) {
924 ret = -ENOMEM;
925 goto out_thread;
926 }
927
727 /* 928 /*
728 * The following block of code has to be executed atomically 929 * The following block of code has to be executed atomically
729 */ 930 */
@@ -735,29 +936,40 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
735 * Can't share interrupts unless both agree to and are 936 * Can't share interrupts unless both agree to and are
736 * the same type (level, edge, polarity). So both flag 937 * the same type (level, edge, polarity). So both flag
737 * fields must have IRQF_SHARED set and the bits which 938 * fields must have IRQF_SHARED set and the bits which
738 * set the trigger type must match. 939 * set the trigger type must match. Also all must
940 * agree on ONESHOT.
739 */ 941 */
740 if (!((old->flags & new->flags) & IRQF_SHARED) || 942 if (!((old->flags & new->flags) & IRQF_SHARED) ||
741 ((old->flags ^ new->flags) & IRQF_TRIGGER_MASK)) { 943 ((old->flags ^ new->flags) & IRQF_TRIGGER_MASK) ||
944 ((old->flags ^ new->flags) & IRQF_ONESHOT)) {
742 old_name = old->name; 945 old_name = old->name;
743 goto mismatch; 946 goto mismatch;
744 } 947 }
745 948
746#if defined(CONFIG_IRQ_PER_CPU)
747 /* All handlers must agree on per-cpuness */ 949 /* All handlers must agree on per-cpuness */
748 if ((old->flags & IRQF_PERCPU) != 950 if ((old->flags & IRQF_PERCPU) !=
749 (new->flags & IRQF_PERCPU)) 951 (new->flags & IRQF_PERCPU))
750 goto mismatch; 952 goto mismatch;
751#endif
752 953
753 /* add new interrupt at end of irq queue */ 954 /* add new interrupt at end of irq queue */
754 do { 955 do {
956 thread_mask |= old->thread_mask;
755 old_ptr = &old->next; 957 old_ptr = &old->next;
756 old = *old_ptr; 958 old = *old_ptr;
757 } while (old); 959 } while (old);
758 shared = 1; 960 shared = 1;
759 } 961 }
760 962
963 /*
964 * Setup the thread mask for this irqaction. Unlikely to have
965 * 32 resp 64 irqs sharing one line, but who knows.
966 */
967 if (new->flags & IRQF_ONESHOT && thread_mask == ~0UL) {
968 ret = -EBUSY;
969 goto out_mask;
970 }
971 new->thread_mask = 1 << ffz(thread_mask);
972
761 if (!shared) { 973 if (!shared) {
762 irq_chip_set_defaults(desc->irq_data.chip); 974 irq_chip_set_defaults(desc->irq_data.chip);
763 975
@@ -769,42 +981,44 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
769 new->flags & IRQF_TRIGGER_MASK); 981 new->flags & IRQF_TRIGGER_MASK);
770 982
771 if (ret) 983 if (ret)
772 goto out_thread; 984 goto out_mask;
773 } else 985 }
774 compat_irq_chip_set_default_handler(desc);
775#if defined(CONFIG_IRQ_PER_CPU)
776 if (new->flags & IRQF_PERCPU)
777 desc->status |= IRQ_PER_CPU;
778#endif
779 986
780 desc->status &= ~(IRQ_AUTODETECT | IRQ_WAITING | IRQ_ONESHOT | 987 desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \
781 IRQ_INPROGRESS | IRQ_SPURIOUS_DISABLED); 988 IRQS_INPROGRESS | IRQS_ONESHOT | \
989 IRQS_WAITING);
990
991 if (new->flags & IRQF_PERCPU) {
992 irqd_set(&desc->irq_data, IRQD_PER_CPU);
993 irq_settings_set_per_cpu(desc);
994 }
782 995
783 if (new->flags & IRQF_ONESHOT) 996 if (new->flags & IRQF_ONESHOT)
784 desc->status |= IRQ_ONESHOT; 997 desc->istate |= IRQS_ONESHOT;
785 998
786 if (!(desc->status & IRQ_NOAUTOEN)) { 999 if (irq_settings_can_autoenable(desc))
787 desc->depth = 0; 1000 irq_startup(desc);
788 desc->status &= ~IRQ_DISABLED; 1001 else
789 desc->irq_data.chip->irq_startup(&desc->irq_data);
790 } else
791 /* Undo nested disables: */ 1002 /* Undo nested disables: */
792 desc->depth = 1; 1003 desc->depth = 1;
793 1004
794 /* Exclude IRQ from balancing if requested */ 1005 /* Exclude IRQ from balancing if requested */
795 if (new->flags & IRQF_NOBALANCING) 1006 if (new->flags & IRQF_NOBALANCING) {
796 desc->status |= IRQ_NO_BALANCING; 1007 irq_settings_set_no_balancing(desc);
1008 irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
1009 }
797 1010
798 /* Set default affinity mask once everything is setup */ 1011 /* Set default affinity mask once everything is setup */
799 setup_affinity(irq, desc); 1012 setup_affinity(irq, desc, mask);
800 1013
801 } else if ((new->flags & IRQF_TRIGGER_MASK) 1014 } else if (new->flags & IRQF_TRIGGER_MASK) {
802 && (new->flags & IRQF_TRIGGER_MASK) 1015 unsigned int nmsk = new->flags & IRQF_TRIGGER_MASK;
803 != (desc->status & IRQ_TYPE_SENSE_MASK)) { 1016 unsigned int omsk = irq_settings_get_trigger_mask(desc);
804 /* hope the handler works with the actual trigger mode... */ 1017
805 pr_warning("IRQ %d uses trigger mode %d; requested %d\n", 1018 if (nmsk != omsk)
806 irq, (int)(desc->status & IRQ_TYPE_SENSE_MASK), 1019 /* hope the handler works with current trigger mode */
807 (int)(new->flags & IRQF_TRIGGER_MASK)); 1020 pr_warning("IRQ %d uses trigger mode %u; requested %u\n",
1021 irq, nmsk, omsk);
808 } 1022 }
809 1023
810 new->irq = irq; 1024 new->irq = irq;
@@ -818,8 +1032,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
818 * Check whether we disabled the irq via the spurious handler 1032 * Check whether we disabled the irq via the spurious handler
819 * before. Reenable it and give it another chance. 1033 * before. Reenable it and give it another chance.
820 */ 1034 */
821 if (shared && (desc->status & IRQ_SPURIOUS_DISABLED)) { 1035 if (shared && (desc->istate & IRQS_SPURIOUS_DISABLED)) {
822 desc->status &= ~IRQ_SPURIOUS_DISABLED; 1036 desc->istate &= ~IRQS_SPURIOUS_DISABLED;
823 __enable_irq(desc, irq, false); 1037 __enable_irq(desc, irq, false);
824 } 1038 }
825 1039
@@ -849,6 +1063,9 @@ mismatch:
849#endif 1063#endif
850 ret = -EBUSY; 1064 ret = -EBUSY;
851 1065
1066out_mask:
1067 free_cpumask_var(mask);
1068
852out_thread: 1069out_thread:
853 raw_spin_unlock_irqrestore(&desc->lock, flags); 1070 raw_spin_unlock_irqrestore(&desc->lock, flags);
854 if (new->thread) { 1071 if (new->thread) {
@@ -871,9 +1088,14 @@ out_thread:
871 */ 1088 */
872int setup_irq(unsigned int irq, struct irqaction *act) 1089int setup_irq(unsigned int irq, struct irqaction *act)
873{ 1090{
1091 int retval;
874 struct irq_desc *desc = irq_to_desc(irq); 1092 struct irq_desc *desc = irq_to_desc(irq);
875 1093
876 return __setup_irq(irq, desc, act); 1094 chip_bus_lock(desc);
1095 retval = __setup_irq(irq, desc, act);
1096 chip_bus_sync_unlock(desc);
1097
1098 return retval;
877} 1099}
878EXPORT_SYMBOL_GPL(setup_irq); 1100EXPORT_SYMBOL_GPL(setup_irq);
879 1101
@@ -924,13 +1146,8 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
924#endif 1146#endif
925 1147
926 /* If this was the last handler, shut down the IRQ line: */ 1148 /* If this was the last handler, shut down the IRQ line: */
927 if (!desc->action) { 1149 if (!desc->action)
928 desc->status |= IRQ_DISABLED; 1150 irq_shutdown(desc);
929 if (desc->irq_data.chip->irq_shutdown)
930 desc->irq_data.chip->irq_shutdown(&desc->irq_data);
931 else
932 desc->irq_data.chip->irq_disable(&desc->irq_data);
933 }
934 1151
935#ifdef CONFIG_SMP 1152#ifdef CONFIG_SMP
936 /* make sure affinity_hint is cleaned up */ 1153 /* make sure affinity_hint is cleaned up */
@@ -1004,6 +1221,11 @@ void free_irq(unsigned int irq, void *dev_id)
1004 if (!desc) 1221 if (!desc)
1005 return; 1222 return;
1006 1223
1224#ifdef CONFIG_SMP
1225 if (WARN_ON(desc->affinity_notify))
1226 desc->affinity_notify = NULL;
1227#endif
1228
1007 chip_bus_lock(desc); 1229 chip_bus_lock(desc);
1008 kfree(__free_irq(irq, dev_id)); 1230 kfree(__free_irq(irq, dev_id));
1009 chip_bus_sync_unlock(desc); 1231 chip_bus_sync_unlock(desc);
@@ -1074,7 +1296,7 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
1074 if (!desc) 1296 if (!desc)
1075 return -EINVAL; 1297 return -EINVAL;
1076 1298
1077 if (desc->status & IRQ_NOREQUEST) 1299 if (!irq_settings_can_request(desc))
1078 return -EINVAL; 1300 return -EINVAL;
1079 1301
1080 if (!handler) { 1302 if (!handler) {
@@ -1100,7 +1322,7 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
1100 if (retval) 1322 if (retval)
1101 kfree(action); 1323 kfree(action);
1102 1324
1103#ifdef CONFIG_DEBUG_SHIRQ 1325#ifdef CONFIG_DEBUG_SHIRQ_FIXME
1104 if (!retval && (irqflags & IRQF_SHARED)) { 1326 if (!retval && (irqflags & IRQF_SHARED)) {
1105 /* 1327 /*
1106 * It's a shared IRQ -- the driver ought to be prepared for it 1328 * It's a shared IRQ -- the driver ought to be prepared for it
@@ -1149,7 +1371,7 @@ int request_any_context_irq(unsigned int irq, irq_handler_t handler,
1149 if (!desc) 1371 if (!desc)
1150 return -EINVAL; 1372 return -EINVAL;
1151 1373
1152 if (desc->status & IRQ_NESTED_THREAD) { 1374 if (irq_settings_is_nested_thread(desc)) {
1153 ret = request_threaded_irq(irq, NULL, handler, 1375 ret = request_threaded_irq(irq, NULL, handler,
1154 flags, name, dev_id); 1376 flags, name, dev_id);
1155 return !ret ? IRQC_IS_NESTED : ret; 1377 return !ret ? IRQC_IS_NESTED : ret;
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 441fd629ff04..ec4806d4778b 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -4,23 +4,23 @@
4 4
5#include "internals.h" 5#include "internals.h"
6 6
7void move_masked_irq(int irq) 7void irq_move_masked_irq(struct irq_data *idata)
8{ 8{
9 struct irq_desc *desc = irq_to_desc(irq); 9 struct irq_desc *desc = irq_data_to_desc(idata);
10 struct irq_chip *chip = desc->irq_data.chip; 10 struct irq_chip *chip = idata->chip;
11 11
12 if (likely(!(desc->status & IRQ_MOVE_PENDING))) 12 if (likely(!irqd_is_setaffinity_pending(&desc->irq_data)))
13 return; 13 return;
14 14
15 /* 15 /*
16 * Paranoia: cpu-local interrupts shouldn't be calling in here anyway. 16 * Paranoia: cpu-local interrupts shouldn't be calling in here anyway.
17 */ 17 */
18 if (CHECK_IRQ_PER_CPU(desc->status)) { 18 if (!irqd_can_balance(&desc->irq_data)) {
19 WARN_ON(1); 19 WARN_ON(1);
20 return; 20 return;
21 } 21 }
22 22
23 desc->status &= ~IRQ_MOVE_PENDING; 23 irqd_clr_move_pending(&desc->irq_data);
24 24
25 if (unlikely(cpumask_empty(desc->pending_mask))) 25 if (unlikely(cpumask_empty(desc->pending_mask)))
26 return; 26 return;
@@ -53,15 +53,20 @@ void move_masked_irq(int irq)
53 cpumask_clear(desc->pending_mask); 53 cpumask_clear(desc->pending_mask);
54} 54}
55 55
56void move_native_irq(int irq) 56void move_masked_irq(int irq)
57{
58 irq_move_masked_irq(irq_get_irq_data(irq));
59}
60
61void irq_move_irq(struct irq_data *idata)
57{ 62{
58 struct irq_desc *desc = irq_to_desc(irq); 63 struct irq_desc *desc = irq_data_to_desc(idata);
59 bool masked; 64 bool masked;
60 65
61 if (likely(!(desc->status & IRQ_MOVE_PENDING))) 66 if (likely(!irqd_is_setaffinity_pending(idata)))
62 return; 67 return;
63 68
64 if (unlikely(desc->status & IRQ_DISABLED)) 69 if (unlikely(desc->istate & IRQS_DISABLED))
65 return; 70 return;
66 71
67 /* 72 /*
@@ -69,10 +74,15 @@ void move_native_irq(int irq)
69 * threaded interrupt with ONESHOT set, we can end up with an 74 * threaded interrupt with ONESHOT set, we can end up with an
70 * interrupt storm. 75 * interrupt storm.
71 */ 76 */
72 masked = desc->status & IRQ_MASKED; 77 masked = desc->istate & IRQS_MASKED;
73 if (!masked) 78 if (!masked)
74 desc->irq_data.chip->irq_mask(&desc->irq_data); 79 idata->chip->irq_mask(idata);
75 move_masked_irq(irq); 80 irq_move_masked_irq(idata);
76 if (!masked) 81 if (!masked)
77 desc->irq_data.chip->irq_unmask(&desc->irq_data); 82 idata->chip->irq_unmask(idata);
83}
84
85void move_native_irq(int irq)
86{
87 irq_move_irq(irq_get_irq_data(irq));
78} 88}
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index 0d4005d85b03..f76fc00c9877 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -18,7 +18,7 @@
18 * During system-wide suspend or hibernation device drivers need to be prevented 18 * During system-wide suspend or hibernation device drivers need to be prevented
19 * from receiving interrupts and this function is provided for this purpose. 19 * from receiving interrupts and this function is provided for this purpose.
20 * It marks all interrupt lines in use, except for the timer ones, as disabled 20 * It marks all interrupt lines in use, except for the timer ones, as disabled
21 * and sets the IRQ_SUSPENDED flag for each of them. 21 * and sets the IRQS_SUSPENDED flag for each of them.
22 */ 22 */
23void suspend_device_irqs(void) 23void suspend_device_irqs(void)
24{ 24{
@@ -34,7 +34,7 @@ void suspend_device_irqs(void)
34 } 34 }
35 35
36 for_each_irq_desc(irq, desc) 36 for_each_irq_desc(irq, desc)
37 if (desc->status & IRQ_SUSPENDED) 37 if (desc->istate & IRQS_SUSPENDED)
38 synchronize_irq(irq); 38 synchronize_irq(irq);
39} 39}
40EXPORT_SYMBOL_GPL(suspend_device_irqs); 40EXPORT_SYMBOL_GPL(suspend_device_irqs);
@@ -43,7 +43,7 @@ EXPORT_SYMBOL_GPL(suspend_device_irqs);
43 * resume_device_irqs - enable interrupt lines disabled by suspend_device_irqs() 43 * resume_device_irqs - enable interrupt lines disabled by suspend_device_irqs()
44 * 44 *
45 * Enable all interrupt lines previously disabled by suspend_device_irqs() that 45 * Enable all interrupt lines previously disabled by suspend_device_irqs() that
46 * have the IRQ_SUSPENDED flag set. 46 * have the IRQS_SUSPENDED flag set.
47 */ 47 */
48void resume_device_irqs(void) 48void resume_device_irqs(void)
49{ 49{
@@ -53,9 +53,6 @@ void resume_device_irqs(void)
53 for_each_irq_desc(irq, desc) { 53 for_each_irq_desc(irq, desc) {
54 unsigned long flags; 54 unsigned long flags;
55 55
56 if (!(desc->status & IRQ_SUSPENDED))
57 continue;
58
59 raw_spin_lock_irqsave(&desc->lock, flags); 56 raw_spin_lock_irqsave(&desc->lock, flags);
60 __enable_irq(desc, irq, true); 57 __enable_irq(desc, irq, true);
61 raw_spin_unlock_irqrestore(&desc->lock, flags); 58 raw_spin_unlock_irqrestore(&desc->lock, flags);
@@ -71,9 +68,24 @@ int check_wakeup_irqs(void)
71 struct irq_desc *desc; 68 struct irq_desc *desc;
72 int irq; 69 int irq;
73 70
74 for_each_irq_desc(irq, desc) 71 for_each_irq_desc(irq, desc) {
75 if ((desc->status & IRQ_WAKEUP) && (desc->status & IRQ_PENDING)) 72 if (irqd_is_wakeup_set(&desc->irq_data)) {
76 return -EBUSY; 73 if (desc->istate & IRQS_PENDING)
74 return -EBUSY;
75 continue;
76 }
77 /*
78 * Check the non wakeup interrupts whether they need
79 * to be masked before finally going into suspend
80 * state. That's for hardware which has no wakeup
81 * source configuration facility. The chip
82 * implementation indicates that with
83 * IRQCHIP_MASK_ON_SUSPEND.
84 */
85 if (desc->istate & IRQS_SUSPENDED &&
86 irq_desc_get_chip(desc)->flags & IRQCHIP_MASK_ON_SUSPEND)
87 mask_irq(desc);
88 }
77 89
78 return 0; 90 return 0;
79} 91}
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 6c8a2a9f8a7b..4cc2e5ed0bec 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -11,6 +11,7 @@
11#include <linux/proc_fs.h> 11#include <linux/proc_fs.h>
12#include <linux/seq_file.h> 12#include <linux/seq_file.h>
13#include <linux/interrupt.h> 13#include <linux/interrupt.h>
14#include <linux/kernel_stat.h>
14 15
15#include "internals.h" 16#include "internals.h"
16 17
@@ -24,7 +25,7 @@ static int irq_affinity_proc_show(struct seq_file *m, void *v)
24 const struct cpumask *mask = desc->irq_data.affinity; 25 const struct cpumask *mask = desc->irq_data.affinity;
25 26
26#ifdef CONFIG_GENERIC_PENDING_IRQ 27#ifdef CONFIG_GENERIC_PENDING_IRQ
27 if (desc->status & IRQ_MOVE_PENDING) 28 if (irqd_is_setaffinity_pending(&desc->irq_data))
28 mask = desc->pending_mask; 29 mask = desc->pending_mask;
29#endif 30#endif
30 seq_cpumask(m, mask); 31 seq_cpumask(m, mask);
@@ -65,8 +66,7 @@ static ssize_t irq_affinity_proc_write(struct file *file,
65 cpumask_var_t new_value; 66 cpumask_var_t new_value;
66 int err; 67 int err;
67 68
68 if (!irq_to_desc(irq)->irq_data.chip->irq_set_affinity || no_irq_affinity || 69 if (!irq_can_set_affinity(irq) || no_irq_affinity)
69 irq_balancing_disabled(irq))
70 return -EIO; 70 return -EIO;
71 71
72 if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) 72 if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
@@ -89,7 +89,7 @@ static ssize_t irq_affinity_proc_write(struct file *file,
89 if (!cpumask_intersects(new_value, cpu_online_mask)) { 89 if (!cpumask_intersects(new_value, cpu_online_mask)) {
90 /* Special case for empty set - allow the architecture 90 /* Special case for empty set - allow the architecture
91 code to set default SMP affinity. */ 91 code to set default SMP affinity. */
92 err = irq_select_affinity_usr(irq) ? -EINVAL : count; 92 err = irq_select_affinity_usr(irq, new_value) ? -EINVAL : count;
93 } else { 93 } else {
94 irq_set_affinity(irq, new_value); 94 irq_set_affinity(irq, new_value);
95 err = count; 95 err = count;
@@ -357,3 +357,65 @@ void init_irq_proc(void)
357 } 357 }
358} 358}
359 359
360#ifdef CONFIG_GENERIC_IRQ_SHOW
361
362int __weak arch_show_interrupts(struct seq_file *p, int prec)
363{
364 return 0;
365}
366
367int show_interrupts(struct seq_file *p, void *v)
368{
369 static int prec;
370
371 unsigned long flags, any_count = 0;
372 int i = *(loff_t *) v, j;
373 struct irqaction *action;
374 struct irq_desc *desc;
375
376 if (i > nr_irqs)
377 return 0;
378
379 if (i == nr_irqs)
380 return arch_show_interrupts(p, prec);
381
382 /* print header and calculate the width of the first column */
383 if (i == 0) {
384 for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec)
385 j *= 10;
386
387 seq_printf(p, "%*s", prec + 8, "");
388 for_each_online_cpu(j)
389 seq_printf(p, "CPU%-8d", j);
390 seq_putc(p, '\n');
391 }
392
393 desc = irq_to_desc(i);
394 if (!desc)
395 return 0;
396
397 raw_spin_lock_irqsave(&desc->lock, flags);
398 for_each_online_cpu(j)
399 any_count |= kstat_irqs_cpu(i, j);
400 action = desc->action;
401 if (!action && !any_count)
402 goto out;
403
404 seq_printf(p, "%*d: ", prec, i);
405 for_each_online_cpu(j)
406 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
407 seq_printf(p, " %8s", desc->irq_data.chip->name);
408 seq_printf(p, "-%-8s", desc->name);
409
410 if (action) {
411 seq_printf(p, " %s", action->name);
412 while ((action = action->next) != NULL)
413 seq_printf(p, ", %s", action->name);
414 }
415
416 seq_putc(p, '\n');
417out:
418 raw_spin_unlock_irqrestore(&desc->lock, flags);
419 return 0;
420}
421#endif
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index 891115a929aa..ad683a99b1ec 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -23,7 +23,7 @@
23#ifdef CONFIG_HARDIRQS_SW_RESEND 23#ifdef CONFIG_HARDIRQS_SW_RESEND
24 24
25/* Bitmap to handle software resend of interrupts: */ 25/* Bitmap to handle software resend of interrupts: */
26static DECLARE_BITMAP(irqs_resend, NR_IRQS); 26static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS);
27 27
28/* 28/*
29 * Run software resends of IRQ's 29 * Run software resends of IRQ's
@@ -55,20 +55,19 @@ static DECLARE_TASKLET(resend_tasklet, resend_irqs, 0);
55 */ 55 */
56void check_irq_resend(struct irq_desc *desc, unsigned int irq) 56void check_irq_resend(struct irq_desc *desc, unsigned int irq)
57{ 57{
58 unsigned int status = desc->status;
59
60 /*
61 * Make sure the interrupt is enabled, before resending it:
62 */
63 desc->irq_data.chip->irq_enable(&desc->irq_data);
64
65 /* 58 /*
66 * We do not resend level type interrupts. Level type 59 * We do not resend level type interrupts. Level type
67 * interrupts are resent by hardware when they are still 60 * interrupts are resent by hardware when they are still
68 * active. 61 * active.
69 */ 62 */
70 if ((status & (IRQ_LEVEL | IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { 63 if (irq_settings_is_level(desc))
71 desc->status = (status & ~IRQ_PENDING) | IRQ_REPLAY; 64 return;
65 if (desc->istate & IRQS_REPLAY)
66 return;
67 if (desc->istate & IRQS_PENDING) {
68 irq_compat_clr_pending(desc);
69 desc->istate &= ~IRQS_PENDING;
70 desc->istate |= IRQS_REPLAY;
72 71
73 if (!desc->irq_data.chip->irq_retrigger || 72 if (!desc->irq_data.chip->irq_retrigger ||
74 !desc->irq_data.chip->irq_retrigger(&desc->irq_data)) { 73 !desc->irq_data.chip->irq_retrigger(&desc->irq_data)) {
diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h
new file mode 100644
index 000000000000..0227ad358272
--- /dev/null
+++ b/kernel/irq/settings.h
@@ -0,0 +1,138 @@
1/*
2 * Internal header to deal with irq_desc->status which will be renamed
3 * to irq_desc->settings.
4 */
5enum {
6 _IRQ_DEFAULT_INIT_FLAGS = IRQ_DEFAULT_INIT_FLAGS,
7 _IRQ_PER_CPU = IRQ_PER_CPU,
8 _IRQ_LEVEL = IRQ_LEVEL,
9 _IRQ_NOPROBE = IRQ_NOPROBE,
10 _IRQ_NOREQUEST = IRQ_NOREQUEST,
11 _IRQ_NOAUTOEN = IRQ_NOAUTOEN,
12 _IRQ_MOVE_PCNTXT = IRQ_MOVE_PCNTXT,
13 _IRQ_NO_BALANCING = IRQ_NO_BALANCING,
14 _IRQ_NESTED_THREAD = IRQ_NESTED_THREAD,
15 _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK,
16};
17
18#define IRQ_INPROGRESS GOT_YOU_MORON
19#define IRQ_REPLAY GOT_YOU_MORON
20#define IRQ_WAITING GOT_YOU_MORON
21#define IRQ_DISABLED GOT_YOU_MORON
22#define IRQ_PENDING GOT_YOU_MORON
23#define IRQ_MASKED GOT_YOU_MORON
24#define IRQ_WAKEUP GOT_YOU_MORON
25#define IRQ_MOVE_PENDING GOT_YOU_MORON
26#define IRQ_PER_CPU GOT_YOU_MORON
27#define IRQ_NO_BALANCING GOT_YOU_MORON
28#define IRQ_AFFINITY_SET GOT_YOU_MORON
29#define IRQ_LEVEL GOT_YOU_MORON
30#define IRQ_NOPROBE GOT_YOU_MORON
31#define IRQ_NOREQUEST GOT_YOU_MORON
32#define IRQ_NOAUTOEN GOT_YOU_MORON
33#define IRQ_NESTED_THREAD GOT_YOU_MORON
34#undef IRQF_MODIFY_MASK
35#define IRQF_MODIFY_MASK GOT_YOU_MORON
36
37static inline void
38irq_settings_clr_and_set(struct irq_desc *desc, u32 clr, u32 set)
39{
40 desc->status &= ~(clr & _IRQF_MODIFY_MASK);
41 desc->status |= (set & _IRQF_MODIFY_MASK);
42}
43
44static inline bool irq_settings_is_per_cpu(struct irq_desc *desc)
45{
46 return desc->status & _IRQ_PER_CPU;
47}
48
49static inline void irq_settings_set_per_cpu(struct irq_desc *desc)
50{
51 desc->status |= _IRQ_PER_CPU;
52}
53
54static inline void irq_settings_set_no_balancing(struct irq_desc *desc)
55{
56 desc->status |= _IRQ_NO_BALANCING;
57}
58
59static inline bool irq_settings_has_no_balance_set(struct irq_desc *desc)
60{
61 return desc->status & _IRQ_NO_BALANCING;
62}
63
64static inline u32 irq_settings_get_trigger_mask(struct irq_desc *desc)
65{
66 return desc->status & IRQ_TYPE_SENSE_MASK;
67}
68
69static inline void
70irq_settings_set_trigger_mask(struct irq_desc *desc, u32 mask)
71{
72 desc->status &= ~IRQ_TYPE_SENSE_MASK;
73 desc->status |= mask & IRQ_TYPE_SENSE_MASK;
74}
75
76static inline bool irq_settings_is_level(struct irq_desc *desc)
77{
78 return desc->status & _IRQ_LEVEL;
79}
80
81static inline void irq_settings_clr_level(struct irq_desc *desc)
82{
83 desc->status &= ~_IRQ_LEVEL;
84}
85
86static inline void irq_settings_set_level(struct irq_desc *desc)
87{
88 desc->status |= _IRQ_LEVEL;
89}
90
91static inline bool irq_settings_can_request(struct irq_desc *desc)
92{
93 return !(desc->status & _IRQ_NOREQUEST);
94}
95
96static inline void irq_settings_clr_norequest(struct irq_desc *desc)
97{
98 desc->status &= ~_IRQ_NOREQUEST;
99}
100
101static inline void irq_settings_set_norequest(struct irq_desc *desc)
102{
103 desc->status |= _IRQ_NOREQUEST;
104}
105
106static inline bool irq_settings_can_probe(struct irq_desc *desc)
107{
108 return !(desc->status & _IRQ_NOPROBE);
109}
110
111static inline void irq_settings_clr_noprobe(struct irq_desc *desc)
112{
113 desc->status &= ~_IRQ_NOPROBE;
114}
115
116static inline void irq_settings_set_noprobe(struct irq_desc *desc)
117{
118 desc->status |= _IRQ_NOPROBE;
119}
120
121static inline bool irq_settings_can_move_pcntxt(struct irq_desc *desc)
122{
123 return desc->status & _IRQ_MOVE_PCNTXT;
124}
125
126static inline bool irq_settings_can_autoenable(struct irq_desc *desc)
127{
128 return !(desc->status & _IRQ_NOAUTOEN);
129}
130
131static inline bool irq_settings_is_nested_thread(struct irq_desc *desc)
132{
133 return desc->status & _IRQ_NESTED_THREAD;
134}
135
136/* Nothing should touch desc->status from now on */
137#undef status
138#define status USE_THE_PROPER_WRAPPERS_YOU_MORON
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 3089d3b9d5f3..dd586ebf9c8c 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -21,70 +21,94 @@ static int irqfixup __read_mostly;
21#define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10) 21#define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10)
22static void poll_spurious_irqs(unsigned long dummy); 22static void poll_spurious_irqs(unsigned long dummy);
23static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs, 0, 0); 23static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs, 0, 0);
24static int irq_poll_cpu;
25static atomic_t irq_poll_active;
26
27/*
28 * We wait here for a poller to finish.
29 *
30 * If the poll runs on this CPU, then we yell loudly and return
31 * false. That will leave the interrupt line disabled in the worst
32 * case, but it should never happen.
33 *
34 * We wait until the poller is done and then recheck disabled and
35 * action (about to be disabled). Only if it's still active, we return
36 * true and let the handler run.
37 */
38bool irq_wait_for_poll(struct irq_desc *desc)
39{
40 if (WARN_ONCE(irq_poll_cpu == smp_processor_id(),
41 "irq poll in progress on cpu %d for irq %d\n",
42 smp_processor_id(), desc->irq_data.irq))
43 return false;
44
45#ifdef CONFIG_SMP
46 do {
47 raw_spin_unlock(&desc->lock);
48 while (desc->istate & IRQS_INPROGRESS)
49 cpu_relax();
50 raw_spin_lock(&desc->lock);
51 } while (desc->istate & IRQS_INPROGRESS);
52 /* Might have been disabled in meantime */
53 return !(desc->istate & IRQS_DISABLED) && desc->action;
54#else
55 return false;
56#endif
57}
58
24 59
25/* 60/*
26 * Recovery handler for misrouted interrupts. 61 * Recovery handler for misrouted interrupts.
27 */ 62 */
28static int try_one_irq(int irq, struct irq_desc *desc) 63static int try_one_irq(int irq, struct irq_desc *desc, bool force)
29{ 64{
65 irqreturn_t ret = IRQ_NONE;
30 struct irqaction *action; 66 struct irqaction *action;
31 int ok = 0, work = 0;
32 67
33 raw_spin_lock(&desc->lock); 68 raw_spin_lock(&desc->lock);
34 /* Already running on another processor */
35 if (desc->status & IRQ_INPROGRESS) {
36 /*
37 * Already running: If it is shared get the other
38 * CPU to go looking for our mystery interrupt too
39 */
40 if (desc->action && (desc->action->flags & IRQF_SHARED))
41 desc->status |= IRQ_PENDING;
42 raw_spin_unlock(&desc->lock);
43 return ok;
44 }
45 /* Honour the normal IRQ locking */
46 desc->status |= IRQ_INPROGRESS;
47 action = desc->action;
48 raw_spin_unlock(&desc->lock);
49 69
50 while (action) { 70 /* PER_CPU and nested thread interrupts are never polled */
51 /* Only shared IRQ handlers are safe to call */ 71 if (irq_settings_is_per_cpu(desc) || irq_settings_is_nested_thread(desc))
52 if (action->flags & IRQF_SHARED) { 72 goto out;
53 if (action->handler(irq, action->dev_id) ==
54 IRQ_HANDLED)
55 ok = 1;
56 }
57 action = action->next;
58 }
59 local_irq_disable();
60 /* Now clean up the flags */
61 raw_spin_lock(&desc->lock);
62 action = desc->action;
63 73
64 /* 74 /*
65 * While we were looking for a fixup someone queued a real 75 * Do not poll disabled interrupts unless the spurious
66 * IRQ clashing with our walk: 76 * disabled poller asks explicitely.
67 */ 77 */
68 while ((desc->status & IRQ_PENDING) && action) { 78 if ((desc->istate & IRQS_DISABLED) && !force)
79 goto out;
80
81 /*
82 * All handlers must agree on IRQF_SHARED, so we test just the
83 * first. Check for action->next as well.
84 */
85 action = desc->action;
86 if (!action || !(action->flags & IRQF_SHARED) ||
87 (action->flags & __IRQF_TIMER) || !action->next)
88 goto out;
89
90 /* Already running on another processor */
91 if (desc->istate & IRQS_INPROGRESS) {
69 /* 92 /*
70 * Perform real IRQ processing for the IRQ we deferred 93 * Already running: If it is shared get the other
94 * CPU to go looking for our mystery interrupt too
71 */ 95 */
72 work = 1; 96 irq_compat_set_pending(desc);
73 raw_spin_unlock(&desc->lock); 97 desc->istate |= IRQS_PENDING;
74 handle_IRQ_event(irq, action); 98 goto out;
75 raw_spin_lock(&desc->lock);
76 desc->status &= ~IRQ_PENDING;
77 } 99 }
78 desc->status &= ~IRQ_INPROGRESS;
79 /*
80 * If we did actual work for the real IRQ line we must let the
81 * IRQ controller clean up too
82 */
83 if (work)
84 irq_end(irq, desc);
85 raw_spin_unlock(&desc->lock);
86 100
87 return ok; 101 /* Mark it poll in progress */
102 desc->istate |= IRQS_POLL_INPROGRESS;
103 do {
104 if (handle_irq_event(desc) == IRQ_HANDLED)
105 ret = IRQ_HANDLED;
106 action = desc->action;
107 } while ((desc->istate & IRQS_PENDING) && action);
108 desc->istate &= ~IRQS_POLL_INPROGRESS;
109out:
110 raw_spin_unlock(&desc->lock);
111 return ret == IRQ_HANDLED;
88} 112}
89 113
90static int misrouted_irq(int irq) 114static int misrouted_irq(int irq)
@@ -92,6 +116,11 @@ static int misrouted_irq(int irq)
92 struct irq_desc *desc; 116 struct irq_desc *desc;
93 int i, ok = 0; 117 int i, ok = 0;
94 118
119 if (atomic_inc_return(&irq_poll_active) == 1)
120 goto out;
121
122 irq_poll_cpu = smp_processor_id();
123
95 for_each_irq_desc(i, desc) { 124 for_each_irq_desc(i, desc) {
96 if (!i) 125 if (!i)
97 continue; 126 continue;
@@ -99,9 +128,11 @@ static int misrouted_irq(int irq)
99 if (i == irq) /* Already tried */ 128 if (i == irq) /* Already tried */
100 continue; 129 continue;
101 130
102 if (try_one_irq(i, desc)) 131 if (try_one_irq(i, desc, false))
103 ok = 1; 132 ok = 1;
104 } 133 }
134out:
135 atomic_dec(&irq_poll_active);
105 /* So the caller can adjust the irq error counts */ 136 /* So the caller can adjust the irq error counts */
106 return ok; 137 return ok;
107} 138}
@@ -111,23 +142,28 @@ static void poll_spurious_irqs(unsigned long dummy)
111 struct irq_desc *desc; 142 struct irq_desc *desc;
112 int i; 143 int i;
113 144
145 if (atomic_inc_return(&irq_poll_active) != 1)
146 goto out;
147 irq_poll_cpu = smp_processor_id();
148
114 for_each_irq_desc(i, desc) { 149 for_each_irq_desc(i, desc) {
115 unsigned int status; 150 unsigned int state;
116 151
117 if (!i) 152 if (!i)
118 continue; 153 continue;
119 154
120 /* Racy but it doesn't matter */ 155 /* Racy but it doesn't matter */
121 status = desc->status; 156 state = desc->istate;
122 barrier(); 157 barrier();
123 if (!(status & IRQ_SPURIOUS_DISABLED)) 158 if (!(state & IRQS_SPURIOUS_DISABLED))
124 continue; 159 continue;
125 160
126 local_irq_disable(); 161 local_irq_disable();
127 try_one_irq(i, desc); 162 try_one_irq(i, desc, true);
128 local_irq_enable(); 163 local_irq_enable();
129 } 164 }
130 165out:
166 atomic_dec(&irq_poll_active);
131 mod_timer(&poll_spurious_irq_timer, 167 mod_timer(&poll_spurious_irq_timer,
132 jiffies + POLL_SPURIOUS_IRQ_INTERVAL); 168 jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
133} 169}
@@ -139,15 +175,13 @@ static void poll_spurious_irqs(unsigned long dummy)
139 * 175 *
140 * (The other 100-of-100,000 interrupts may have been a correctly 176 * (The other 100-of-100,000 interrupts may have been a correctly
141 * functioning device sharing an IRQ with the failing one) 177 * functioning device sharing an IRQ with the failing one)
142 *
143 * Called under desc->lock
144 */ 178 */
145
146static void 179static void
147__report_bad_irq(unsigned int irq, struct irq_desc *desc, 180__report_bad_irq(unsigned int irq, struct irq_desc *desc,
148 irqreturn_t action_ret) 181 irqreturn_t action_ret)
149{ 182{
150 struct irqaction *action; 183 struct irqaction *action;
184 unsigned long flags;
151 185
152 if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) { 186 if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) {
153 printk(KERN_ERR "irq event %d: bogus return value %x\n", 187 printk(KERN_ERR "irq event %d: bogus return value %x\n",
@@ -159,6 +193,13 @@ __report_bad_irq(unsigned int irq, struct irq_desc *desc,
159 dump_stack(); 193 dump_stack();
160 printk(KERN_ERR "handlers:\n"); 194 printk(KERN_ERR "handlers:\n");
161 195
196 /*
197 * We need to take desc->lock here. note_interrupt() is called
198 * w/o desc->lock held, but IRQ_PROGRESS set. We might race
199 * with something else removing an action. It's ok to take
200 * desc->lock here. See synchronize_irq().
201 */
202 raw_spin_lock_irqsave(&desc->lock, flags);
162 action = desc->action; 203 action = desc->action;
163 while (action) { 204 while (action) {
164 printk(KERN_ERR "[<%p>]", action->handler); 205 printk(KERN_ERR "[<%p>]", action->handler);
@@ -167,6 +208,7 @@ __report_bad_irq(unsigned int irq, struct irq_desc *desc,
167 printk("\n"); 208 printk("\n");
168 action = action->next; 209 action = action->next;
169 } 210 }
211 raw_spin_unlock_irqrestore(&desc->lock, flags);
170} 212}
171 213
172static void 214static void
@@ -218,6 +260,9 @@ try_misrouted_irq(unsigned int irq, struct irq_desc *desc,
218void note_interrupt(unsigned int irq, struct irq_desc *desc, 260void note_interrupt(unsigned int irq, struct irq_desc *desc,
219 irqreturn_t action_ret) 261 irqreturn_t action_ret)
220{ 262{
263 if (desc->istate & IRQS_POLL_INPROGRESS)
264 return;
265
221 if (unlikely(action_ret != IRQ_HANDLED)) { 266 if (unlikely(action_ret != IRQ_HANDLED)) {
222 /* 267 /*
223 * If we are seeing only the odd spurious IRQ caused by 268 * If we are seeing only the odd spurious IRQ caused by
@@ -254,9 +299,9 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
254 * Now kill the IRQ 299 * Now kill the IRQ
255 */ 300 */
256 printk(KERN_EMERG "Disabling IRQ #%d\n", irq); 301 printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
257 desc->status |= IRQ_DISABLED | IRQ_SPURIOUS_DISABLED; 302 desc->istate |= IRQS_SPURIOUS_DISABLED;
258 desc->depth++; 303 desc->depth++;
259 desc->irq_data.chip->irq_disable(&desc->irq_data); 304 irq_disable(desc);
260 305
261 mod_timer(&poll_spurious_irq_timer, 306 mod_timer(&poll_spurious_irq_timer,
262 jiffies + POLL_SPURIOUS_IRQ_INTERVAL); 307 jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 999835b6112b..ed253aa24ba4 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -38,13 +38,96 @@
38 38
39#include <asm/irq_regs.h> 39#include <asm/irq_regs.h>
40 40
41struct remote_function_call {
42 struct task_struct *p;
43 int (*func)(void *info);
44 void *info;
45 int ret;
46};
47
48static void remote_function(void *data)
49{
50 struct remote_function_call *tfc = data;
51 struct task_struct *p = tfc->p;
52
53 if (p) {
54 tfc->ret = -EAGAIN;
55 if (task_cpu(p) != smp_processor_id() || !task_curr(p))
56 return;
57 }
58
59 tfc->ret = tfc->func(tfc->info);
60}
61
62/**
63 * task_function_call - call a function on the cpu on which a task runs
64 * @p: the task to evaluate
65 * @func: the function to be called
66 * @info: the function call argument
67 *
68 * Calls the function @func when the task is currently running. This might
69 * be on the current CPU, which just calls the function directly
70 *
71 * returns: @func return value, or
72 * -ESRCH - when the process isn't running
73 * -EAGAIN - when the process moved away
74 */
75static int
76task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
77{
78 struct remote_function_call data = {
79 .p = p,
80 .func = func,
81 .info = info,
82 .ret = -ESRCH, /* No such (running) process */
83 };
84
85 if (task_curr(p))
86 smp_call_function_single(task_cpu(p), remote_function, &data, 1);
87
88 return data.ret;
89}
90
91/**
92 * cpu_function_call - call a function on the cpu
93 * @func: the function to be called
94 * @info: the function call argument
95 *
96 * Calls the function @func on the remote cpu.
97 *
98 * returns: @func return value or -ENXIO when the cpu is offline
99 */
100static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
101{
102 struct remote_function_call data = {
103 .p = NULL,
104 .func = func,
105 .info = info,
106 .ret = -ENXIO, /* No such CPU */
107 };
108
109 smp_call_function_single(cpu, remote_function, &data, 1);
110
111 return data.ret;
112}
113
114#define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
115 PERF_FLAG_FD_OUTPUT |\
116 PERF_FLAG_PID_CGROUP)
117
41enum event_type_t { 118enum event_type_t {
42 EVENT_FLEXIBLE = 0x1, 119 EVENT_FLEXIBLE = 0x1,
43 EVENT_PINNED = 0x2, 120 EVENT_PINNED = 0x2,
44 EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, 121 EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
45}; 122};
46 123
47atomic_t perf_task_events __read_mostly; 124/*
125 * perf_sched_events : >0 events exist
126 * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
127 */
128atomic_t perf_sched_events __read_mostly;
129static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
130
48static atomic_t nr_mmap_events __read_mostly; 131static atomic_t nr_mmap_events __read_mostly;
49static atomic_t nr_comm_events __read_mostly; 132static atomic_t nr_comm_events __read_mostly;
50static atomic_t nr_task_events __read_mostly; 133static atomic_t nr_task_events __read_mostly;
@@ -67,7 +150,24 @@ int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */
67/* 150/*
68 * max perf event sample rate 151 * max perf event sample rate
69 */ 152 */
70int sysctl_perf_event_sample_rate __read_mostly = 100000; 153#define DEFAULT_MAX_SAMPLE_RATE 100000
154int sysctl_perf_event_sample_rate __read_mostly = DEFAULT_MAX_SAMPLE_RATE;
155static int max_samples_per_tick __read_mostly =
156 DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ);
157
158int perf_proc_update_handler(struct ctl_table *table, int write,
159 void __user *buffer, size_t *lenp,
160 loff_t *ppos)
161{
162 int ret = proc_dointvec(table, write, buffer, lenp, ppos);
163
164 if (ret || !write)
165 return ret;
166
167 max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ);
168
169 return 0;
170}
71 171
72static atomic64_t perf_event_id; 172static atomic64_t perf_event_id;
73 173
@@ -75,7 +175,11 @@ static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
75 enum event_type_t event_type); 175 enum event_type_t event_type);
76 176
77static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, 177static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
78 enum event_type_t event_type); 178 enum event_type_t event_type,
179 struct task_struct *task);
180
181static void update_context_time(struct perf_event_context *ctx);
182static u64 perf_event_time(struct perf_event *event);
79 183
80void __weak perf_event_print_debug(void) { } 184void __weak perf_event_print_debug(void) { }
81 185
@@ -89,6 +193,360 @@ static inline u64 perf_clock(void)
89 return local_clock(); 193 return local_clock();
90} 194}
91 195
196static inline struct perf_cpu_context *
197__get_cpu_context(struct perf_event_context *ctx)
198{
199 return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
200}
201
202#ifdef CONFIG_CGROUP_PERF
203
204/*
205 * Must ensure cgroup is pinned (css_get) before calling
206 * this function. In other words, we cannot call this function
207 * if there is no cgroup event for the current CPU context.
208 */
209static inline struct perf_cgroup *
210perf_cgroup_from_task(struct task_struct *task)
211{
212 return container_of(task_subsys_state(task, perf_subsys_id),
213 struct perf_cgroup, css);
214}
215
216static inline bool
217perf_cgroup_match(struct perf_event *event)
218{
219 struct perf_event_context *ctx = event->ctx;
220 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
221
222 return !event->cgrp || event->cgrp == cpuctx->cgrp;
223}
224
225static inline void perf_get_cgroup(struct perf_event *event)
226{
227 css_get(&event->cgrp->css);
228}
229
230static inline void perf_put_cgroup(struct perf_event *event)
231{
232 css_put(&event->cgrp->css);
233}
234
235static inline void perf_detach_cgroup(struct perf_event *event)
236{
237 perf_put_cgroup(event);
238 event->cgrp = NULL;
239}
240
241static inline int is_cgroup_event(struct perf_event *event)
242{
243 return event->cgrp != NULL;
244}
245
246static inline u64 perf_cgroup_event_time(struct perf_event *event)
247{
248 struct perf_cgroup_info *t;
249
250 t = per_cpu_ptr(event->cgrp->info, event->cpu);
251 return t->time;
252}
253
254static inline void __update_cgrp_time(struct perf_cgroup *cgrp)
255{
256 struct perf_cgroup_info *info;
257 u64 now;
258
259 now = perf_clock();
260
261 info = this_cpu_ptr(cgrp->info);
262
263 info->time += now - info->timestamp;
264 info->timestamp = now;
265}
266
267static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
268{
269 struct perf_cgroup *cgrp_out = cpuctx->cgrp;
270 if (cgrp_out)
271 __update_cgrp_time(cgrp_out);
272}
273
274static inline void update_cgrp_time_from_event(struct perf_event *event)
275{
276 struct perf_cgroup *cgrp;
277
278 /*
279 * ensure we access cgroup data only when needed and
280 * when we know the cgroup is pinned (css_get)
281 */
282 if (!is_cgroup_event(event))
283 return;
284
285 cgrp = perf_cgroup_from_task(current);
286 /*
287 * Do not update time when cgroup is not active
288 */
289 if (cgrp == event->cgrp)
290 __update_cgrp_time(event->cgrp);
291}
292
293static inline void
294perf_cgroup_set_timestamp(struct task_struct *task,
295 struct perf_event_context *ctx)
296{
297 struct perf_cgroup *cgrp;
298 struct perf_cgroup_info *info;
299
300 /*
301 * ctx->lock held by caller
302 * ensure we do not access cgroup data
303 * unless we have the cgroup pinned (css_get)
304 */
305 if (!task || !ctx->nr_cgroups)
306 return;
307
308 cgrp = perf_cgroup_from_task(task);
309 info = this_cpu_ptr(cgrp->info);
310 info->timestamp = ctx->timestamp;
311}
312
313#define PERF_CGROUP_SWOUT 0x1 /* cgroup switch out every event */
314#define PERF_CGROUP_SWIN 0x2 /* cgroup switch in events based on task */
315
316/*
317 * reschedule events based on the cgroup constraint of task.
318 *
319 * mode SWOUT : schedule out everything
320 * mode SWIN : schedule in based on cgroup for next
321 */
322void perf_cgroup_switch(struct task_struct *task, int mode)
323{
324 struct perf_cpu_context *cpuctx;
325 struct pmu *pmu;
326 unsigned long flags;
327
328 /*
329 * disable interrupts to avoid geting nr_cgroup
330 * changes via __perf_event_disable(). Also
331 * avoids preemption.
332 */
333 local_irq_save(flags);
334
335 /*
336 * we reschedule only in the presence of cgroup
337 * constrained events.
338 */
339 rcu_read_lock();
340
341 list_for_each_entry_rcu(pmu, &pmus, entry) {
342
343 cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
344
345 perf_pmu_disable(cpuctx->ctx.pmu);
346
347 /*
348 * perf_cgroup_events says at least one
349 * context on this CPU has cgroup events.
350 *
351 * ctx->nr_cgroups reports the number of cgroup
352 * events for a context.
353 */
354 if (cpuctx->ctx.nr_cgroups > 0) {
355
356 if (mode & PERF_CGROUP_SWOUT) {
357 cpu_ctx_sched_out(cpuctx, EVENT_ALL);
358 /*
359 * must not be done before ctxswout due
360 * to event_filter_match() in event_sched_out()
361 */
362 cpuctx->cgrp = NULL;
363 }
364
365 if (mode & PERF_CGROUP_SWIN) {
366 /* set cgrp before ctxsw in to
367 * allow event_filter_match() to not
368 * have to pass task around
369 */
370 cpuctx->cgrp = perf_cgroup_from_task(task);
371 cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
372 }
373 }
374
375 perf_pmu_enable(cpuctx->ctx.pmu);
376 }
377
378 rcu_read_unlock();
379
380 local_irq_restore(flags);
381}
382
383static inline void perf_cgroup_sched_out(struct task_struct *task)
384{
385 perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
386}
387
388static inline void perf_cgroup_sched_in(struct task_struct *task)
389{
390 perf_cgroup_switch(task, PERF_CGROUP_SWIN);
391}
392
393static inline int perf_cgroup_connect(int fd, struct perf_event *event,
394 struct perf_event_attr *attr,
395 struct perf_event *group_leader)
396{
397 struct perf_cgroup *cgrp;
398 struct cgroup_subsys_state *css;
399 struct file *file;
400 int ret = 0, fput_needed;
401
402 file = fget_light(fd, &fput_needed);
403 if (!file)
404 return -EBADF;
405
406 css = cgroup_css_from_dir(file, perf_subsys_id);
407 if (IS_ERR(css)) {
408 ret = PTR_ERR(css);
409 goto out;
410 }
411
412 cgrp = container_of(css, struct perf_cgroup, css);
413 event->cgrp = cgrp;
414
415 /* must be done before we fput() the file */
416 perf_get_cgroup(event);
417
418 /*
419 * all events in a group must monitor
420 * the same cgroup because a task belongs
421 * to only one perf cgroup at a time
422 */
423 if (group_leader && group_leader->cgrp != cgrp) {
424 perf_detach_cgroup(event);
425 ret = -EINVAL;
426 }
427out:
428 fput_light(file, fput_needed);
429 return ret;
430}
431
432static inline void
433perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
434{
435 struct perf_cgroup_info *t;
436 t = per_cpu_ptr(event->cgrp->info, event->cpu);
437 event->shadow_ctx_time = now - t->timestamp;
438}
439
440static inline void
441perf_cgroup_defer_enabled(struct perf_event *event)
442{
443 /*
444 * when the current task's perf cgroup does not match
445 * the event's, we need to remember to call the
446 * perf_mark_enable() function the first time a task with
447 * a matching perf cgroup is scheduled in.
448 */
449 if (is_cgroup_event(event) && !perf_cgroup_match(event))
450 event->cgrp_defer_enabled = 1;
451}
452
453static inline void
454perf_cgroup_mark_enabled(struct perf_event *event,
455 struct perf_event_context *ctx)
456{
457 struct perf_event *sub;
458 u64 tstamp = perf_event_time(event);
459
460 if (!event->cgrp_defer_enabled)
461 return;
462
463 event->cgrp_defer_enabled = 0;
464
465 event->tstamp_enabled = tstamp - event->total_time_enabled;
466 list_for_each_entry(sub, &event->sibling_list, group_entry) {
467 if (sub->state >= PERF_EVENT_STATE_INACTIVE) {
468 sub->tstamp_enabled = tstamp - sub->total_time_enabled;
469 sub->cgrp_defer_enabled = 0;
470 }
471 }
472}
473#else /* !CONFIG_CGROUP_PERF */
474
475static inline bool
476perf_cgroup_match(struct perf_event *event)
477{
478 return true;
479}
480
481static inline void perf_detach_cgroup(struct perf_event *event)
482{}
483
484static inline int is_cgroup_event(struct perf_event *event)
485{
486 return 0;
487}
488
489static inline u64 perf_cgroup_event_cgrp_time(struct perf_event *event)
490{
491 return 0;
492}
493
494static inline void update_cgrp_time_from_event(struct perf_event *event)
495{
496}
497
498static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
499{
500}
501
502static inline void perf_cgroup_sched_out(struct task_struct *task)
503{
504}
505
506static inline void perf_cgroup_sched_in(struct task_struct *task)
507{
508}
509
510static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event,
511 struct perf_event_attr *attr,
512 struct perf_event *group_leader)
513{
514 return -EINVAL;
515}
516
517static inline void
518perf_cgroup_set_timestamp(struct task_struct *task,
519 struct perf_event_context *ctx)
520{
521}
522
523void
524perf_cgroup_switch(struct task_struct *task, struct task_struct *next)
525{
526}
527
528static inline void
529perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
530{
531}
532
533static inline u64 perf_cgroup_event_time(struct perf_event *event)
534{
535 return 0;
536}
537
538static inline void
539perf_cgroup_defer_enabled(struct perf_event *event)
540{
541}
542
543static inline void
544perf_cgroup_mark_enabled(struct perf_event *event,
545 struct perf_event_context *ctx)
546{
547}
548#endif
549
92void perf_pmu_disable(struct pmu *pmu) 550void perf_pmu_disable(struct pmu *pmu)
93{ 551{
94 int *count = this_cpu_ptr(pmu->pmu_disable_count); 552 int *count = this_cpu_ptr(pmu->pmu_disable_count);
@@ -254,7 +712,6 @@ static void perf_unpin_context(struct perf_event_context *ctx)
254 raw_spin_lock_irqsave(&ctx->lock, flags); 712 raw_spin_lock_irqsave(&ctx->lock, flags);
255 --ctx->pin_count; 713 --ctx->pin_count;
256 raw_spin_unlock_irqrestore(&ctx->lock, flags); 714 raw_spin_unlock_irqrestore(&ctx->lock, flags);
257 put_ctx(ctx);
258} 715}
259 716
260/* 717/*
@@ -271,6 +728,10 @@ static void update_context_time(struct perf_event_context *ctx)
271static u64 perf_event_time(struct perf_event *event) 728static u64 perf_event_time(struct perf_event *event)
272{ 729{
273 struct perf_event_context *ctx = event->ctx; 730 struct perf_event_context *ctx = event->ctx;
731
732 if (is_cgroup_event(event))
733 return perf_cgroup_event_time(event);
734
274 return ctx ? ctx->time : 0; 735 return ctx ? ctx->time : 0;
275} 736}
276 737
@@ -285,9 +746,20 @@ static void update_event_times(struct perf_event *event)
285 if (event->state < PERF_EVENT_STATE_INACTIVE || 746 if (event->state < PERF_EVENT_STATE_INACTIVE ||
286 event->group_leader->state < PERF_EVENT_STATE_INACTIVE) 747 event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
287 return; 748 return;
288 749 /*
289 if (ctx->is_active) 750 * in cgroup mode, time_enabled represents
751 * the time the event was enabled AND active
752 * tasks were in the monitored cgroup. This is
753 * independent of the activity of the context as
754 * there may be a mix of cgroup and non-cgroup events.
755 *
756 * That is why we treat cgroup events differently
757 * here.
758 */
759 if (is_cgroup_event(event))
290 run_end = perf_event_time(event); 760 run_end = perf_event_time(event);
761 else if (ctx->is_active)
762 run_end = ctx->time;
291 else 763 else
292 run_end = event->tstamp_stopped; 764 run_end = event->tstamp_stopped;
293 765
@@ -299,6 +771,7 @@ static void update_event_times(struct perf_event *event)
299 run_end = perf_event_time(event); 771 run_end = perf_event_time(event);
300 772
301 event->total_time_running = run_end - event->tstamp_running; 773 event->total_time_running = run_end - event->tstamp_running;
774
302} 775}
303 776
304/* 777/*
@@ -347,6 +820,9 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
347 list_add_tail(&event->group_entry, list); 820 list_add_tail(&event->group_entry, list);
348 } 821 }
349 822
823 if (is_cgroup_event(event))
824 ctx->nr_cgroups++;
825
350 list_add_rcu(&event->event_entry, &ctx->event_list); 826 list_add_rcu(&event->event_entry, &ctx->event_list);
351 if (!ctx->nr_events) 827 if (!ctx->nr_events)
352 perf_pmu_rotate_start(ctx->pmu); 828 perf_pmu_rotate_start(ctx->pmu);
@@ -473,6 +949,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
473 949
474 event->attach_state &= ~PERF_ATTACH_CONTEXT; 950 event->attach_state &= ~PERF_ATTACH_CONTEXT;
475 951
952 if (is_cgroup_event(event))
953 ctx->nr_cgroups--;
954
476 ctx->nr_events--; 955 ctx->nr_events--;
477 if (event->attr.inherit_stat) 956 if (event->attr.inherit_stat)
478 ctx->nr_stat--; 957 ctx->nr_stat--;
@@ -544,7 +1023,8 @@ out:
544static inline int 1023static inline int
545event_filter_match(struct perf_event *event) 1024event_filter_match(struct perf_event *event)
546{ 1025{
547 return event->cpu == -1 || event->cpu == smp_processor_id(); 1026 return (event->cpu == -1 || event->cpu == smp_processor_id())
1027 && perf_cgroup_match(event);
548} 1028}
549 1029
550static void 1030static void
@@ -562,7 +1042,7 @@ event_sched_out(struct perf_event *event,
562 */ 1042 */
563 if (event->state == PERF_EVENT_STATE_INACTIVE 1043 if (event->state == PERF_EVENT_STATE_INACTIVE
564 && !event_filter_match(event)) { 1044 && !event_filter_match(event)) {
565 delta = ctx->time - event->tstamp_stopped; 1045 delta = tstamp - event->tstamp_stopped;
566 event->tstamp_running += delta; 1046 event->tstamp_running += delta;
567 event->tstamp_stopped = tstamp; 1047 event->tstamp_stopped = tstamp;
568 } 1048 }
@@ -606,47 +1086,30 @@ group_sched_out(struct perf_event *group_event,
606 cpuctx->exclusive = 0; 1086 cpuctx->exclusive = 0;
607} 1087}
608 1088
609static inline struct perf_cpu_context *
610__get_cpu_context(struct perf_event_context *ctx)
611{
612 return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
613}
614
615/* 1089/*
616 * Cross CPU call to remove a performance event 1090 * Cross CPU call to remove a performance event
617 * 1091 *
618 * We disable the event on the hardware level first. After that we 1092 * We disable the event on the hardware level first. After that we
619 * remove it from the context list. 1093 * remove it from the context list.
620 */ 1094 */
621static void __perf_event_remove_from_context(void *info) 1095static int __perf_remove_from_context(void *info)
622{ 1096{
623 struct perf_event *event = info; 1097 struct perf_event *event = info;
624 struct perf_event_context *ctx = event->ctx; 1098 struct perf_event_context *ctx = event->ctx;
625 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); 1099 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
626 1100
627 /*
628 * If this is a task context, we need to check whether it is
629 * the current task context of this cpu. If not it has been
630 * scheduled out before the smp call arrived.
631 */
632 if (ctx->task && cpuctx->task_ctx != ctx)
633 return;
634
635 raw_spin_lock(&ctx->lock); 1101 raw_spin_lock(&ctx->lock);
636
637 event_sched_out(event, cpuctx, ctx); 1102 event_sched_out(event, cpuctx, ctx);
638
639 list_del_event(event, ctx); 1103 list_del_event(event, ctx);
640
641 raw_spin_unlock(&ctx->lock); 1104 raw_spin_unlock(&ctx->lock);
1105
1106 return 0;
642} 1107}
643 1108
644 1109
645/* 1110/*
646 * Remove the event from a task's (or a CPU's) list of events. 1111 * Remove the event from a task's (or a CPU's) list of events.
647 * 1112 *
648 * Must be called with ctx->mutex held.
649 *
650 * CPU events are removed with a smp call. For task events we only 1113 * CPU events are removed with a smp call. For task events we only
651 * call when the task is on a CPU. 1114 * call when the task is on a CPU.
652 * 1115 *
@@ -657,49 +1120,48 @@ static void __perf_event_remove_from_context(void *info)
657 * When called from perf_event_exit_task, it's OK because the 1120 * When called from perf_event_exit_task, it's OK because the
658 * context has been detached from its task. 1121 * context has been detached from its task.
659 */ 1122 */
660static void perf_event_remove_from_context(struct perf_event *event) 1123static void perf_remove_from_context(struct perf_event *event)
661{ 1124{
662 struct perf_event_context *ctx = event->ctx; 1125 struct perf_event_context *ctx = event->ctx;
663 struct task_struct *task = ctx->task; 1126 struct task_struct *task = ctx->task;
664 1127
1128 lockdep_assert_held(&ctx->mutex);
1129
665 if (!task) { 1130 if (!task) {
666 /* 1131 /*
667 * Per cpu events are removed via an smp call and 1132 * Per cpu events are removed via an smp call and
668 * the removal is always successful. 1133 * the removal is always successful.
669 */ 1134 */
670 smp_call_function_single(event->cpu, 1135 cpu_function_call(event->cpu, __perf_remove_from_context, event);
671 __perf_event_remove_from_context,
672 event, 1);
673 return; 1136 return;
674 } 1137 }
675 1138
676retry: 1139retry:
677 task_oncpu_function_call(task, __perf_event_remove_from_context, 1140 if (!task_function_call(task, __perf_remove_from_context, event))
678 event); 1141 return;
679 1142
680 raw_spin_lock_irq(&ctx->lock); 1143 raw_spin_lock_irq(&ctx->lock);
681 /* 1144 /*
682 * If the context is active we need to retry the smp call. 1145 * If we failed to find a running task, but find the context active now
1146 * that we've acquired the ctx->lock, retry.
683 */ 1147 */
684 if (ctx->nr_active && !list_empty(&event->group_entry)) { 1148 if (ctx->is_active) {
685 raw_spin_unlock_irq(&ctx->lock); 1149 raw_spin_unlock_irq(&ctx->lock);
686 goto retry; 1150 goto retry;
687 } 1151 }
688 1152
689 /* 1153 /*
690 * The lock prevents that this context is scheduled in so we 1154 * Since the task isn't running, its safe to remove the event, us
691 * can remove the event safely, if the call above did not 1155 * holding the ctx->lock ensures the task won't get scheduled in.
692 * succeed.
693 */ 1156 */
694 if (!list_empty(&event->group_entry)) 1157 list_del_event(event, ctx);
695 list_del_event(event, ctx);
696 raw_spin_unlock_irq(&ctx->lock); 1158 raw_spin_unlock_irq(&ctx->lock);
697} 1159}
698 1160
699/* 1161/*
700 * Cross CPU call to disable a performance event 1162 * Cross CPU call to disable a performance event
701 */ 1163 */
702static void __perf_event_disable(void *info) 1164static int __perf_event_disable(void *info)
703{ 1165{
704 struct perf_event *event = info; 1166 struct perf_event *event = info;
705 struct perf_event_context *ctx = event->ctx; 1167 struct perf_event_context *ctx = event->ctx;
@@ -708,9 +1170,12 @@ static void __perf_event_disable(void *info)
708 /* 1170 /*
709 * If this is a per-task event, need to check whether this 1171 * If this is a per-task event, need to check whether this
710 * event's task is the current task on this cpu. 1172 * event's task is the current task on this cpu.
1173 *
1174 * Can trigger due to concurrent perf_event_context_sched_out()
1175 * flipping contexts around.
711 */ 1176 */
712 if (ctx->task && cpuctx->task_ctx != ctx) 1177 if (ctx->task && cpuctx->task_ctx != ctx)
713 return; 1178 return -EINVAL;
714 1179
715 raw_spin_lock(&ctx->lock); 1180 raw_spin_lock(&ctx->lock);
716 1181
@@ -720,6 +1185,7 @@ static void __perf_event_disable(void *info)
720 */ 1185 */
721 if (event->state >= PERF_EVENT_STATE_INACTIVE) { 1186 if (event->state >= PERF_EVENT_STATE_INACTIVE) {
722 update_context_time(ctx); 1187 update_context_time(ctx);
1188 update_cgrp_time_from_event(event);
723 update_group_times(event); 1189 update_group_times(event);
724 if (event == event->group_leader) 1190 if (event == event->group_leader)
725 group_sched_out(event, cpuctx, ctx); 1191 group_sched_out(event, cpuctx, ctx);
@@ -729,6 +1195,8 @@ static void __perf_event_disable(void *info)
729 } 1195 }
730 1196
731 raw_spin_unlock(&ctx->lock); 1197 raw_spin_unlock(&ctx->lock);
1198
1199 return 0;
732} 1200}
733 1201
734/* 1202/*
@@ -753,13 +1221,13 @@ void perf_event_disable(struct perf_event *event)
753 /* 1221 /*
754 * Disable the event on the cpu that it's on 1222 * Disable the event on the cpu that it's on
755 */ 1223 */
756 smp_call_function_single(event->cpu, __perf_event_disable, 1224 cpu_function_call(event->cpu, __perf_event_disable, event);
757 event, 1);
758 return; 1225 return;
759 } 1226 }
760 1227
761retry: 1228retry:
762 task_oncpu_function_call(task, __perf_event_disable, event); 1229 if (!task_function_call(task, __perf_event_disable, event))
1230 return;
763 1231
764 raw_spin_lock_irq(&ctx->lock); 1232 raw_spin_lock_irq(&ctx->lock);
765 /* 1233 /*
@@ -767,6 +1235,11 @@ retry:
767 */ 1235 */
768 if (event->state == PERF_EVENT_STATE_ACTIVE) { 1236 if (event->state == PERF_EVENT_STATE_ACTIVE) {
769 raw_spin_unlock_irq(&ctx->lock); 1237 raw_spin_unlock_irq(&ctx->lock);
1238 /*
1239 * Reload the task pointer, it might have been changed by
1240 * a concurrent perf_event_context_sched_out().
1241 */
1242 task = ctx->task;
770 goto retry; 1243 goto retry;
771 } 1244 }
772 1245
@@ -778,10 +1251,48 @@ retry:
778 update_group_times(event); 1251 update_group_times(event);
779 event->state = PERF_EVENT_STATE_OFF; 1252 event->state = PERF_EVENT_STATE_OFF;
780 } 1253 }
781
782 raw_spin_unlock_irq(&ctx->lock); 1254 raw_spin_unlock_irq(&ctx->lock);
783} 1255}
784 1256
1257static void perf_set_shadow_time(struct perf_event *event,
1258 struct perf_event_context *ctx,
1259 u64 tstamp)
1260{
1261 /*
1262 * use the correct time source for the time snapshot
1263 *
1264 * We could get by without this by leveraging the
1265 * fact that to get to this function, the caller
1266 * has most likely already called update_context_time()
1267 * and update_cgrp_time_xx() and thus both timestamp
1268 * are identical (or very close). Given that tstamp is,
1269 * already adjusted for cgroup, we could say that:
1270 * tstamp - ctx->timestamp
1271 * is equivalent to
1272 * tstamp - cgrp->timestamp.
1273 *
1274 * Then, in perf_output_read(), the calculation would
1275 * work with no changes because:
1276 * - event is guaranteed scheduled in
1277 * - no scheduled out in between
1278 * - thus the timestamp would be the same
1279 *
1280 * But this is a bit hairy.
1281 *
1282 * So instead, we have an explicit cgroup call to remain
1283 * within the time time source all along. We believe it
1284 * is cleaner and simpler to understand.
1285 */
1286 if (is_cgroup_event(event))
1287 perf_cgroup_set_shadow_time(event, tstamp);
1288 else
1289 event->shadow_ctx_time = tstamp - ctx->timestamp;
1290}
1291
1292#define MAX_INTERRUPTS (~0ULL)
1293
1294static void perf_log_throttle(struct perf_event *event, int enable);
1295
785static int 1296static int
786event_sched_in(struct perf_event *event, 1297event_sched_in(struct perf_event *event,
787 struct perf_cpu_context *cpuctx, 1298 struct perf_cpu_context *cpuctx,
@@ -794,6 +1305,17 @@ event_sched_in(struct perf_event *event,
794 1305
795 event->state = PERF_EVENT_STATE_ACTIVE; 1306 event->state = PERF_EVENT_STATE_ACTIVE;
796 event->oncpu = smp_processor_id(); 1307 event->oncpu = smp_processor_id();
1308
1309 /*
1310 * Unthrottle events, since we scheduled we might have missed several
1311 * ticks already, also for a heavily scheduling task there is little
1312 * guarantee it'll get a tick in a timely manner.
1313 */
1314 if (unlikely(event->hw.interrupts == MAX_INTERRUPTS)) {
1315 perf_log_throttle(event, 1);
1316 event->hw.interrupts = 0;
1317 }
1318
797 /* 1319 /*
798 * The new state must be visible before we turn it on in the hardware: 1320 * The new state must be visible before we turn it on in the hardware:
799 */ 1321 */
@@ -807,7 +1329,7 @@ event_sched_in(struct perf_event *event,
807 1329
808 event->tstamp_running += tstamp - event->tstamp_stopped; 1330 event->tstamp_running += tstamp - event->tstamp_stopped;
809 1331
810 event->shadow_ctx_time = tstamp - ctx->timestamp; 1332 perf_set_shadow_time(event, ctx, tstamp);
811 1333
812 if (!is_software_event(event)) 1334 if (!is_software_event(event))
813 cpuctx->active_oncpu++; 1335 cpuctx->active_oncpu++;
@@ -928,12 +1450,15 @@ static void add_event_to_ctx(struct perf_event *event,
928 event->tstamp_stopped = tstamp; 1450 event->tstamp_stopped = tstamp;
929} 1451}
930 1452
1453static void perf_event_context_sched_in(struct perf_event_context *ctx,
1454 struct task_struct *tsk);
1455
931/* 1456/*
932 * Cross CPU call to install and enable a performance event 1457 * Cross CPU call to install and enable a performance event
933 * 1458 *
934 * Must be called with ctx->mutex held 1459 * Must be called with ctx->mutex held
935 */ 1460 */
936static void __perf_install_in_context(void *info) 1461static int __perf_install_in_context(void *info)
937{ 1462{
938 struct perf_event *event = info; 1463 struct perf_event *event = info;
939 struct perf_event_context *ctx = event->ctx; 1464 struct perf_event_context *ctx = event->ctx;
@@ -942,21 +1467,22 @@ static void __perf_install_in_context(void *info)
942 int err; 1467 int err;
943 1468
944 /* 1469 /*
945 * If this is a task context, we need to check whether it is 1470 * In case we're installing a new context to an already running task,
946 * the current task context of this cpu. If not it has been 1471 * could also happen before perf_event_task_sched_in() on architectures
947 * scheduled out before the smp call arrived. 1472 * which do context switches with IRQs enabled.
948 * Or possibly this is the right context but it isn't
949 * on this cpu because it had no events.
950 */ 1473 */
951 if (ctx->task && cpuctx->task_ctx != ctx) { 1474 if (ctx->task && !cpuctx->task_ctx)
952 if (cpuctx->task_ctx || ctx->task != current) 1475 perf_event_context_sched_in(ctx, ctx->task);
953 return;
954 cpuctx->task_ctx = ctx;
955 }
956 1476
957 raw_spin_lock(&ctx->lock); 1477 raw_spin_lock(&ctx->lock);
958 ctx->is_active = 1; 1478 ctx->is_active = 1;
959 update_context_time(ctx); 1479 update_context_time(ctx);
1480 /*
1481 * update cgrp time only if current cgrp
1482 * matches event->cgrp. Must be done before
1483 * calling add_event_to_ctx()
1484 */
1485 update_cgrp_time_from_event(event);
960 1486
961 add_event_to_ctx(event, ctx); 1487 add_event_to_ctx(event, ctx);
962 1488
@@ -997,6 +1523,8 @@ static void __perf_install_in_context(void *info)
997 1523
998unlock: 1524unlock:
999 raw_spin_unlock(&ctx->lock); 1525 raw_spin_unlock(&ctx->lock);
1526
1527 return 0;
1000} 1528}
1001 1529
1002/* 1530/*
@@ -1008,8 +1536,6 @@ unlock:
1008 * If the event is attached to a task which is on a CPU we use a smp 1536 * If the event is attached to a task which is on a CPU we use a smp
1009 * call to enable it in the task context. The task might have been 1537 * call to enable it in the task context. The task might have been
1010 * scheduled away, but we check this in the smp call again. 1538 * scheduled away, but we check this in the smp call again.
1011 *
1012 * Must be called with ctx->mutex held.
1013 */ 1539 */
1014static void 1540static void
1015perf_install_in_context(struct perf_event_context *ctx, 1541perf_install_in_context(struct perf_event_context *ctx,
@@ -1018,6 +1544,8 @@ perf_install_in_context(struct perf_event_context *ctx,
1018{ 1544{
1019 struct task_struct *task = ctx->task; 1545 struct task_struct *task = ctx->task;
1020 1546
1547 lockdep_assert_held(&ctx->mutex);
1548
1021 event->ctx = ctx; 1549 event->ctx = ctx;
1022 1550
1023 if (!task) { 1551 if (!task) {
@@ -1025,31 +1553,29 @@ perf_install_in_context(struct perf_event_context *ctx,
1025 * Per cpu events are installed via an smp call and 1553 * Per cpu events are installed via an smp call and
1026 * the install is always successful. 1554 * the install is always successful.
1027 */ 1555 */
1028 smp_call_function_single(cpu, __perf_install_in_context, 1556 cpu_function_call(cpu, __perf_install_in_context, event);
1029 event, 1);
1030 return; 1557 return;
1031 } 1558 }
1032 1559
1033retry: 1560retry:
1034 task_oncpu_function_call(task, __perf_install_in_context, 1561 if (!task_function_call(task, __perf_install_in_context, event))
1035 event); 1562 return;
1036 1563
1037 raw_spin_lock_irq(&ctx->lock); 1564 raw_spin_lock_irq(&ctx->lock);
1038 /* 1565 /*
1039 * we need to retry the smp call. 1566 * If we failed to find a running task, but find the context active now
1567 * that we've acquired the ctx->lock, retry.
1040 */ 1568 */
1041 if (ctx->is_active && list_empty(&event->group_entry)) { 1569 if (ctx->is_active) {
1042 raw_spin_unlock_irq(&ctx->lock); 1570 raw_spin_unlock_irq(&ctx->lock);
1043 goto retry; 1571 goto retry;
1044 } 1572 }
1045 1573
1046 /* 1574 /*
1047 * The lock prevents that this context is scheduled in so we 1575 * Since the task isn't running, its safe to add the event, us holding
1048 * can add the event safely, if it the call above did not 1576 * the ctx->lock ensures the task won't get scheduled in.
1049 * succeed.
1050 */ 1577 */
1051 if (list_empty(&event->group_entry)) 1578 add_event_to_ctx(event, ctx);
1052 add_event_to_ctx(event, ctx);
1053 raw_spin_unlock_irq(&ctx->lock); 1579 raw_spin_unlock_irq(&ctx->lock);
1054} 1580}
1055 1581
@@ -1078,7 +1604,7 @@ static void __perf_event_mark_enabled(struct perf_event *event,
1078/* 1604/*
1079 * Cross CPU call to enable a performance event 1605 * Cross CPU call to enable a performance event
1080 */ 1606 */
1081static void __perf_event_enable(void *info) 1607static int __perf_event_enable(void *info)
1082{ 1608{
1083 struct perf_event *event = info; 1609 struct perf_event *event = info;
1084 struct perf_event_context *ctx = event->ctx; 1610 struct perf_event_context *ctx = event->ctx;
@@ -1086,26 +1612,27 @@ static void __perf_event_enable(void *info)
1086 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); 1612 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
1087 int err; 1613 int err;
1088 1614
1089 /* 1615 if (WARN_ON_ONCE(!ctx->is_active))
1090 * If this is a per-task event, need to check whether this 1616 return -EINVAL;
1091 * event's task is the current task on this cpu.
1092 */
1093 if (ctx->task && cpuctx->task_ctx != ctx) {
1094 if (cpuctx->task_ctx || ctx->task != current)
1095 return;
1096 cpuctx->task_ctx = ctx;
1097 }
1098 1617
1099 raw_spin_lock(&ctx->lock); 1618 raw_spin_lock(&ctx->lock);
1100 ctx->is_active = 1;
1101 update_context_time(ctx); 1619 update_context_time(ctx);
1102 1620
1103 if (event->state >= PERF_EVENT_STATE_INACTIVE) 1621 if (event->state >= PERF_EVENT_STATE_INACTIVE)
1104 goto unlock; 1622 goto unlock;
1623
1624 /*
1625 * set current task's cgroup time reference point
1626 */
1627 perf_cgroup_set_timestamp(current, ctx);
1628
1105 __perf_event_mark_enabled(event, ctx); 1629 __perf_event_mark_enabled(event, ctx);
1106 1630
1107 if (!event_filter_match(event)) 1631 if (!event_filter_match(event)) {
1632 if (is_cgroup_event(event))
1633 perf_cgroup_defer_enabled(event);
1108 goto unlock; 1634 goto unlock;
1635 }
1109 1636
1110 /* 1637 /*
1111 * If the event is in a group and isn't the group leader, 1638 * If the event is in a group and isn't the group leader,
@@ -1138,6 +1665,8 @@ static void __perf_event_enable(void *info)
1138 1665
1139unlock: 1666unlock:
1140 raw_spin_unlock(&ctx->lock); 1667 raw_spin_unlock(&ctx->lock);
1668
1669 return 0;
1141} 1670}
1142 1671
1143/* 1672/*
@@ -1158,8 +1687,7 @@ void perf_event_enable(struct perf_event *event)
1158 /* 1687 /*
1159 * Enable the event on the cpu that it's on 1688 * Enable the event on the cpu that it's on
1160 */ 1689 */
1161 smp_call_function_single(event->cpu, __perf_event_enable, 1690 cpu_function_call(event->cpu, __perf_event_enable, event);
1162 event, 1);
1163 return; 1691 return;
1164 } 1692 }
1165 1693
@@ -1178,8 +1706,15 @@ void perf_event_enable(struct perf_event *event)
1178 event->state = PERF_EVENT_STATE_OFF; 1706 event->state = PERF_EVENT_STATE_OFF;
1179 1707
1180retry: 1708retry:
1709 if (!ctx->is_active) {
1710 __perf_event_mark_enabled(event, ctx);
1711 goto out;
1712 }
1713
1181 raw_spin_unlock_irq(&ctx->lock); 1714 raw_spin_unlock_irq(&ctx->lock);
1182 task_oncpu_function_call(task, __perf_event_enable, event); 1715
1716 if (!task_function_call(task, __perf_event_enable, event))
1717 return;
1183 1718
1184 raw_spin_lock_irq(&ctx->lock); 1719 raw_spin_lock_irq(&ctx->lock);
1185 1720
@@ -1187,15 +1722,14 @@ retry:
1187 * If the context is active and the event is still off, 1722 * If the context is active and the event is still off,
1188 * we need to retry the cross-call. 1723 * we need to retry the cross-call.
1189 */ 1724 */
1190 if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) 1725 if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) {
1726 /*
1727 * task could have been flipped by a concurrent
1728 * perf_event_context_sched_out()
1729 */
1730 task = ctx->task;
1191 goto retry; 1731 goto retry;
1192 1732 }
1193 /*
1194 * Since we have the lock this context can't be scheduled
1195 * in, so we can change the state safely.
1196 */
1197 if (event->state == PERF_EVENT_STATE_OFF)
1198 __perf_event_mark_enabled(event, ctx);
1199 1733
1200out: 1734out:
1201 raw_spin_unlock_irq(&ctx->lock); 1735 raw_spin_unlock_irq(&ctx->lock);
@@ -1227,6 +1761,7 @@ static void ctx_sched_out(struct perf_event_context *ctx,
1227 if (likely(!ctx->nr_events)) 1761 if (likely(!ctx->nr_events))
1228 goto out; 1762 goto out;
1229 update_context_time(ctx); 1763 update_context_time(ctx);
1764 update_cgrp_time_from_cpuctx(cpuctx);
1230 1765
1231 if (!ctx->nr_active) 1766 if (!ctx->nr_active)
1232 goto out; 1767 goto out;
@@ -1339,8 +1874,8 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
1339 } 1874 }
1340} 1875}
1341 1876
1342void perf_event_context_sched_out(struct task_struct *task, int ctxn, 1877static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
1343 struct task_struct *next) 1878 struct task_struct *next)
1344{ 1879{
1345 struct perf_event_context *ctx = task->perf_event_ctxp[ctxn]; 1880 struct perf_event_context *ctx = task->perf_event_ctxp[ctxn];
1346 struct perf_event_context *next_ctx; 1881 struct perf_event_context *next_ctx;
@@ -1416,6 +1951,14 @@ void __perf_event_task_sched_out(struct task_struct *task,
1416 1951
1417 for_each_task_context_nr(ctxn) 1952 for_each_task_context_nr(ctxn)
1418 perf_event_context_sched_out(task, ctxn, next); 1953 perf_event_context_sched_out(task, ctxn, next);
1954
1955 /*
1956 * if cgroup events exist on this CPU, then we need
1957 * to check if we have to switch out PMU state.
1958 * cgroup event are system-wide mode only
1959 */
1960 if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
1961 perf_cgroup_sched_out(task);
1419} 1962}
1420 1963
1421static void task_ctx_sched_out(struct perf_event_context *ctx, 1964static void task_ctx_sched_out(struct perf_event_context *ctx,
@@ -1454,6 +1997,10 @@ ctx_pinned_sched_in(struct perf_event_context *ctx,
1454 if (!event_filter_match(event)) 1997 if (!event_filter_match(event))
1455 continue; 1998 continue;
1456 1999
2000 /* may need to reset tstamp_enabled */
2001 if (is_cgroup_event(event))
2002 perf_cgroup_mark_enabled(event, ctx);
2003
1457 if (group_can_go_on(event, cpuctx, 1)) 2004 if (group_can_go_on(event, cpuctx, 1))
1458 group_sched_in(event, cpuctx, ctx); 2005 group_sched_in(event, cpuctx, ctx);
1459 2006
@@ -1486,6 +2033,10 @@ ctx_flexible_sched_in(struct perf_event_context *ctx,
1486 if (!event_filter_match(event)) 2033 if (!event_filter_match(event))
1487 continue; 2034 continue;
1488 2035
2036 /* may need to reset tstamp_enabled */
2037 if (is_cgroup_event(event))
2038 perf_cgroup_mark_enabled(event, ctx);
2039
1489 if (group_can_go_on(event, cpuctx, can_add_hw)) { 2040 if (group_can_go_on(event, cpuctx, can_add_hw)) {
1490 if (group_sched_in(event, cpuctx, ctx)) 2041 if (group_sched_in(event, cpuctx, ctx))
1491 can_add_hw = 0; 2042 can_add_hw = 0;
@@ -1496,15 +2047,19 @@ ctx_flexible_sched_in(struct perf_event_context *ctx,
1496static void 2047static void
1497ctx_sched_in(struct perf_event_context *ctx, 2048ctx_sched_in(struct perf_event_context *ctx,
1498 struct perf_cpu_context *cpuctx, 2049 struct perf_cpu_context *cpuctx,
1499 enum event_type_t event_type) 2050 enum event_type_t event_type,
2051 struct task_struct *task)
1500{ 2052{
2053 u64 now;
2054
1501 raw_spin_lock(&ctx->lock); 2055 raw_spin_lock(&ctx->lock);
1502 ctx->is_active = 1; 2056 ctx->is_active = 1;
1503 if (likely(!ctx->nr_events)) 2057 if (likely(!ctx->nr_events))
1504 goto out; 2058 goto out;
1505 2059
1506 ctx->timestamp = perf_clock(); 2060 now = perf_clock();
1507 2061 ctx->timestamp = now;
2062 perf_cgroup_set_timestamp(task, ctx);
1508 /* 2063 /*
1509 * First go through the list and put on any pinned groups 2064 * First go through the list and put on any pinned groups
1510 * in order to give them the best chance of going on. 2065 * in order to give them the best chance of going on.
@@ -1521,11 +2076,12 @@ out:
1521} 2076}
1522 2077
1523static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, 2078static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
1524 enum event_type_t event_type) 2079 enum event_type_t event_type,
2080 struct task_struct *task)
1525{ 2081{
1526 struct perf_event_context *ctx = &cpuctx->ctx; 2082 struct perf_event_context *ctx = &cpuctx->ctx;
1527 2083
1528 ctx_sched_in(ctx, cpuctx, event_type); 2084 ctx_sched_in(ctx, cpuctx, event_type, task);
1529} 2085}
1530 2086
1531static void task_ctx_sched_in(struct perf_event_context *ctx, 2087static void task_ctx_sched_in(struct perf_event_context *ctx,
@@ -1533,15 +2089,16 @@ static void task_ctx_sched_in(struct perf_event_context *ctx,
1533{ 2089{
1534 struct perf_cpu_context *cpuctx; 2090 struct perf_cpu_context *cpuctx;
1535 2091
1536 cpuctx = __get_cpu_context(ctx); 2092 cpuctx = __get_cpu_context(ctx);
1537 if (cpuctx->task_ctx == ctx) 2093 if (cpuctx->task_ctx == ctx)
1538 return; 2094 return;
1539 2095
1540 ctx_sched_in(ctx, cpuctx, event_type); 2096 ctx_sched_in(ctx, cpuctx, event_type, NULL);
1541 cpuctx->task_ctx = ctx; 2097 cpuctx->task_ctx = ctx;
1542} 2098}
1543 2099
1544void perf_event_context_sched_in(struct perf_event_context *ctx) 2100static void perf_event_context_sched_in(struct perf_event_context *ctx,
2101 struct task_struct *task)
1545{ 2102{
1546 struct perf_cpu_context *cpuctx; 2103 struct perf_cpu_context *cpuctx;
1547 2104
@@ -1557,9 +2114,9 @@ void perf_event_context_sched_in(struct perf_event_context *ctx)
1557 */ 2114 */
1558 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); 2115 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
1559 2116
1560 ctx_sched_in(ctx, cpuctx, EVENT_PINNED); 2117 ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task);
1561 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); 2118 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task);
1562 ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE); 2119 ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
1563 2120
1564 cpuctx->task_ctx = ctx; 2121 cpuctx->task_ctx = ctx;
1565 2122
@@ -1592,14 +2149,17 @@ void __perf_event_task_sched_in(struct task_struct *task)
1592 if (likely(!ctx)) 2149 if (likely(!ctx))
1593 continue; 2150 continue;
1594 2151
1595 perf_event_context_sched_in(ctx); 2152 perf_event_context_sched_in(ctx, task);
1596 } 2153 }
2154 /*
2155 * if cgroup events exist on this CPU, then we need
2156 * to check if we have to switch in PMU state.
2157 * cgroup event are system-wide mode only
2158 */
2159 if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
2160 perf_cgroup_sched_in(task);
1597} 2161}
1598 2162
1599#define MAX_INTERRUPTS (~0ULL)
1600
1601static void perf_log_throttle(struct perf_event *event, int enable);
1602
1603static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) 2163static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
1604{ 2164{
1605 u64 frequency = event->attr.sample_freq; 2165 u64 frequency = event->attr.sample_freq;
@@ -1627,7 +2187,7 @@ static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
1627 * Reduce accuracy by one bit such that @a and @b converge 2187 * Reduce accuracy by one bit such that @a and @b converge
1628 * to a similar magnitude. 2188 * to a similar magnitude.
1629 */ 2189 */
1630#define REDUCE_FLS(a, b) \ 2190#define REDUCE_FLS(a, b) \
1631do { \ 2191do { \
1632 if (a##_fls > b##_fls) { \ 2192 if (a##_fls > b##_fls) { \
1633 a >>= 1; \ 2193 a >>= 1; \
@@ -1797,7 +2357,7 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
1797 if (ctx) 2357 if (ctx)
1798 rotate_ctx(ctx); 2358 rotate_ctx(ctx);
1799 2359
1800 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); 2360 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, current);
1801 if (ctx) 2361 if (ctx)
1802 task_ctx_sched_in(ctx, EVENT_FLEXIBLE); 2362 task_ctx_sched_in(ctx, EVENT_FLEXIBLE);
1803 2363
@@ -1876,7 +2436,7 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
1876 2436
1877 raw_spin_unlock(&ctx->lock); 2437 raw_spin_unlock(&ctx->lock);
1878 2438
1879 perf_event_context_sched_in(ctx); 2439 perf_event_context_sched_in(ctx, ctx->task);
1880out: 2440out:
1881 local_irq_restore(flags); 2441 local_irq_restore(flags);
1882} 2442}
@@ -1901,8 +2461,10 @@ static void __perf_event_read(void *info)
1901 return; 2461 return;
1902 2462
1903 raw_spin_lock(&ctx->lock); 2463 raw_spin_lock(&ctx->lock);
1904 if (ctx->is_active) 2464 if (ctx->is_active) {
1905 update_context_time(ctx); 2465 update_context_time(ctx);
2466 update_cgrp_time_from_event(event);
2467 }
1906 update_event_times(event); 2468 update_event_times(event);
1907 if (event->state == PERF_EVENT_STATE_ACTIVE) 2469 if (event->state == PERF_EVENT_STATE_ACTIVE)
1908 event->pmu->read(event); 2470 event->pmu->read(event);
@@ -1933,8 +2495,10 @@ static u64 perf_event_read(struct perf_event *event)
1933 * (e.g., thread is blocked), in that case 2495 * (e.g., thread is blocked), in that case
1934 * we cannot update context time 2496 * we cannot update context time
1935 */ 2497 */
1936 if (ctx->is_active) 2498 if (ctx->is_active) {
1937 update_context_time(ctx); 2499 update_context_time(ctx);
2500 update_cgrp_time_from_event(event);
2501 }
1938 update_event_times(event); 2502 update_event_times(event);
1939 raw_spin_unlock_irqrestore(&ctx->lock, flags); 2503 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1940 } 2504 }
@@ -2213,6 +2777,9 @@ errout:
2213 2777
2214} 2778}
2215 2779
2780/*
2781 * Returns a matching context with refcount and pincount.
2782 */
2216static struct perf_event_context * 2783static struct perf_event_context *
2217find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) 2784find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
2218{ 2785{
@@ -2237,6 +2804,7 @@ find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
2237 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); 2804 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
2238 ctx = &cpuctx->ctx; 2805 ctx = &cpuctx->ctx;
2239 get_ctx(ctx); 2806 get_ctx(ctx);
2807 ++ctx->pin_count;
2240 2808
2241 return ctx; 2809 return ctx;
2242 } 2810 }
@@ -2250,6 +2818,7 @@ retry:
2250 ctx = perf_lock_task_context(task, ctxn, &flags); 2818 ctx = perf_lock_task_context(task, ctxn, &flags);
2251 if (ctx) { 2819 if (ctx) {
2252 unclone_ctx(ctx); 2820 unclone_ctx(ctx);
2821 ++ctx->pin_count;
2253 raw_spin_unlock_irqrestore(&ctx->lock, flags); 2822 raw_spin_unlock_irqrestore(&ctx->lock, flags);
2254 } 2823 }
2255 2824
@@ -2271,8 +2840,10 @@ retry:
2271 err = -ESRCH; 2840 err = -ESRCH;
2272 else if (task->perf_event_ctxp[ctxn]) 2841 else if (task->perf_event_ctxp[ctxn])
2273 err = -EAGAIN; 2842 err = -EAGAIN;
2274 else 2843 else {
2844 ++ctx->pin_count;
2275 rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx); 2845 rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx);
2846 }
2276 mutex_unlock(&task->perf_event_mutex); 2847 mutex_unlock(&task->perf_event_mutex);
2277 2848
2278 if (unlikely(err)) { 2849 if (unlikely(err)) {
@@ -2312,7 +2883,7 @@ static void free_event(struct perf_event *event)
2312 2883
2313 if (!event->parent) { 2884 if (!event->parent) {
2314 if (event->attach_state & PERF_ATTACH_TASK) 2885 if (event->attach_state & PERF_ATTACH_TASK)
2315 jump_label_dec(&perf_task_events); 2886 jump_label_dec(&perf_sched_events);
2316 if (event->attr.mmap || event->attr.mmap_data) 2887 if (event->attr.mmap || event->attr.mmap_data)
2317 atomic_dec(&nr_mmap_events); 2888 atomic_dec(&nr_mmap_events);
2318 if (event->attr.comm) 2889 if (event->attr.comm)
@@ -2321,6 +2892,10 @@ static void free_event(struct perf_event *event)
2321 atomic_dec(&nr_task_events); 2892 atomic_dec(&nr_task_events);
2322 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) 2893 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
2323 put_callchain_buffers(); 2894 put_callchain_buffers();
2895 if (is_cgroup_event(event)) {
2896 atomic_dec(&per_cpu(perf_cgroup_events, event->cpu));
2897 jump_label_dec(&perf_sched_events);
2898 }
2324 } 2899 }
2325 2900
2326 if (event->buffer) { 2901 if (event->buffer) {
@@ -2328,6 +2903,9 @@ static void free_event(struct perf_event *event)
2328 event->buffer = NULL; 2903 event->buffer = NULL;
2329 } 2904 }
2330 2905
2906 if (is_cgroup_event(event))
2907 perf_detach_cgroup(event);
2908
2331 if (event->destroy) 2909 if (event->destroy)
2332 event->destroy(event); 2910 event->destroy(event);
2333 2911
@@ -4395,26 +4973,14 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
4395 if (unlikely(!is_sampling_event(event))) 4973 if (unlikely(!is_sampling_event(event)))
4396 return 0; 4974 return 0;
4397 4975
4398 if (!throttle) { 4976 if (unlikely(hwc->interrupts >= max_samples_per_tick)) {
4399 hwc->interrupts++; 4977 if (throttle) {
4400 } else { 4978 hwc->interrupts = MAX_INTERRUPTS;
4401 if (hwc->interrupts != MAX_INTERRUPTS) { 4979 perf_log_throttle(event, 0);
4402 hwc->interrupts++;
4403 if (HZ * hwc->interrupts >
4404 (u64)sysctl_perf_event_sample_rate) {
4405 hwc->interrupts = MAX_INTERRUPTS;
4406 perf_log_throttle(event, 0);
4407 ret = 1;
4408 }
4409 } else {
4410 /*
4411 * Keep re-disabling events even though on the previous
4412 * pass we disabled it - just in case we raced with a
4413 * sched-in and the event got enabled again:
4414 */
4415 ret = 1; 4980 ret = 1;
4416 } 4981 }
4417 } 4982 } else
4983 hwc->interrupts++;
4418 4984
4419 if (event->attr.freq) { 4985 if (event->attr.freq) {
4420 u64 now = perf_clock(); 4986 u64 now = perf_clock();
@@ -5051,6 +5617,10 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
5051 u64 period; 5617 u64 period;
5052 5618
5053 event = container_of(hrtimer, struct perf_event, hw.hrtimer); 5619 event = container_of(hrtimer, struct perf_event, hw.hrtimer);
5620
5621 if (event->state != PERF_EVENT_STATE_ACTIVE)
5622 return HRTIMER_NORESTART;
5623
5054 event->pmu->read(event); 5624 event->pmu->read(event);
5055 5625
5056 perf_sample_data_init(&data, 0); 5626 perf_sample_data_init(&data, 0);
@@ -5077,9 +5647,6 @@ static void perf_swevent_start_hrtimer(struct perf_event *event)
5077 if (!is_sampling_event(event)) 5647 if (!is_sampling_event(event))
5078 return; 5648 return;
5079 5649
5080 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
5081 hwc->hrtimer.function = perf_swevent_hrtimer;
5082
5083 period = local64_read(&hwc->period_left); 5650 period = local64_read(&hwc->period_left);
5084 if (period) { 5651 if (period) {
5085 if (period < 0) 5652 if (period < 0)
@@ -5106,6 +5673,30 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event)
5106 } 5673 }
5107} 5674}
5108 5675
5676static void perf_swevent_init_hrtimer(struct perf_event *event)
5677{
5678 struct hw_perf_event *hwc = &event->hw;
5679
5680 if (!is_sampling_event(event))
5681 return;
5682
5683 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
5684 hwc->hrtimer.function = perf_swevent_hrtimer;
5685
5686 /*
5687 * Since hrtimers have a fixed rate, we can do a static freq->period
5688 * mapping and avoid the whole period adjust feedback stuff.
5689 */
5690 if (event->attr.freq) {
5691 long freq = event->attr.sample_freq;
5692
5693 event->attr.sample_period = NSEC_PER_SEC / freq;
5694 hwc->sample_period = event->attr.sample_period;
5695 local64_set(&hwc->period_left, hwc->sample_period);
5696 event->attr.freq = 0;
5697 }
5698}
5699
5109/* 5700/*
5110 * Software event: cpu wall time clock 5701 * Software event: cpu wall time clock
5111 */ 5702 */
@@ -5158,6 +5749,8 @@ static int cpu_clock_event_init(struct perf_event *event)
5158 if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) 5749 if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)
5159 return -ENOENT; 5750 return -ENOENT;
5160 5751
5752 perf_swevent_init_hrtimer(event);
5753
5161 return 0; 5754 return 0;
5162} 5755}
5163 5756
@@ -5213,16 +5806,9 @@ static void task_clock_event_del(struct perf_event *event, int flags)
5213 5806
5214static void task_clock_event_read(struct perf_event *event) 5807static void task_clock_event_read(struct perf_event *event)
5215{ 5808{
5216 u64 time; 5809 u64 now = perf_clock();
5217 5810 u64 delta = now - event->ctx->timestamp;
5218 if (!in_nmi()) { 5811 u64 time = event->ctx->time + delta;
5219 update_context_time(event->ctx);
5220 time = event->ctx->time;
5221 } else {
5222 u64 now = perf_clock();
5223 u64 delta = now - event->ctx->timestamp;
5224 time = event->ctx->time + delta;
5225 }
5226 5812
5227 task_clock_event_update(event, time); 5813 task_clock_event_update(event, time);
5228} 5814}
@@ -5235,6 +5821,8 @@ static int task_clock_event_init(struct perf_event *event)
5235 if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) 5821 if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)
5236 return -ENOENT; 5822 return -ENOENT;
5237 5823
5824 perf_swevent_init_hrtimer(event);
5825
5238 return 0; 5826 return 0;
5239} 5827}
5240 5828
@@ -5506,17 +6094,22 @@ struct pmu *perf_init_event(struct perf_event *event)
5506{ 6094{
5507 struct pmu *pmu = NULL; 6095 struct pmu *pmu = NULL;
5508 int idx; 6096 int idx;
6097 int ret;
5509 6098
5510 idx = srcu_read_lock(&pmus_srcu); 6099 idx = srcu_read_lock(&pmus_srcu);
5511 6100
5512 rcu_read_lock(); 6101 rcu_read_lock();
5513 pmu = idr_find(&pmu_idr, event->attr.type); 6102 pmu = idr_find(&pmu_idr, event->attr.type);
5514 rcu_read_unlock(); 6103 rcu_read_unlock();
5515 if (pmu) 6104 if (pmu) {
6105 ret = pmu->event_init(event);
6106 if (ret)
6107 pmu = ERR_PTR(ret);
5516 goto unlock; 6108 goto unlock;
6109 }
5517 6110
5518 list_for_each_entry_rcu(pmu, &pmus, entry) { 6111 list_for_each_entry_rcu(pmu, &pmus, entry) {
5519 int ret = pmu->event_init(event); 6112 ret = pmu->event_init(event);
5520 if (!ret) 6113 if (!ret)
5521 goto unlock; 6114 goto unlock;
5522 6115
@@ -5642,7 +6235,7 @@ done:
5642 6235
5643 if (!event->parent) { 6236 if (!event->parent) {
5644 if (event->attach_state & PERF_ATTACH_TASK) 6237 if (event->attach_state & PERF_ATTACH_TASK)
5645 jump_label_inc(&perf_task_events); 6238 jump_label_inc(&perf_sched_events);
5646 if (event->attr.mmap || event->attr.mmap_data) 6239 if (event->attr.mmap || event->attr.mmap_data)
5647 atomic_inc(&nr_mmap_events); 6240 atomic_inc(&nr_mmap_events);
5648 if (event->attr.comm) 6241 if (event->attr.comm)
@@ -5817,7 +6410,7 @@ SYSCALL_DEFINE5(perf_event_open,
5817 int err; 6410 int err;
5818 6411
5819 /* for future expandability... */ 6412 /* for future expandability... */
5820 if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT)) 6413 if (flags & ~PERF_FLAG_ALL)
5821 return -EINVAL; 6414 return -EINVAL;
5822 6415
5823 err = perf_copy_attr(attr_uptr, &attr); 6416 err = perf_copy_attr(attr_uptr, &attr);
@@ -5834,6 +6427,15 @@ SYSCALL_DEFINE5(perf_event_open,
5834 return -EINVAL; 6427 return -EINVAL;
5835 } 6428 }
5836 6429
6430 /*
6431 * In cgroup mode, the pid argument is used to pass the fd
6432 * opened to the cgroup directory in cgroupfs. The cpu argument
6433 * designates the cpu on which to monitor threads from that
6434 * cgroup.
6435 */
6436 if ((flags & PERF_FLAG_PID_CGROUP) && (pid == -1 || cpu == -1))
6437 return -EINVAL;
6438
5837 event_fd = get_unused_fd_flags(O_RDWR); 6439 event_fd = get_unused_fd_flags(O_RDWR);
5838 if (event_fd < 0) 6440 if (event_fd < 0)
5839 return event_fd; 6441 return event_fd;
@@ -5851,7 +6453,7 @@ SYSCALL_DEFINE5(perf_event_open,
5851 group_leader = NULL; 6453 group_leader = NULL;
5852 } 6454 }
5853 6455
5854 if (pid != -1) { 6456 if (pid != -1 && !(flags & PERF_FLAG_PID_CGROUP)) {
5855 task = find_lively_task_by_vpid(pid); 6457 task = find_lively_task_by_vpid(pid);
5856 if (IS_ERR(task)) { 6458 if (IS_ERR(task)) {
5857 err = PTR_ERR(task); 6459 err = PTR_ERR(task);
@@ -5865,6 +6467,19 @@ SYSCALL_DEFINE5(perf_event_open,
5865 goto err_task; 6467 goto err_task;
5866 } 6468 }
5867 6469
6470 if (flags & PERF_FLAG_PID_CGROUP) {
6471 err = perf_cgroup_connect(pid, event, &attr, group_leader);
6472 if (err)
6473 goto err_alloc;
6474 /*
6475 * one more event:
6476 * - that has cgroup constraint on event->cpu
6477 * - that may need work on context switch
6478 */
6479 atomic_inc(&per_cpu(perf_cgroup_events, event->cpu));
6480 jump_label_inc(&perf_sched_events);
6481 }
6482
5868 /* 6483 /*
5869 * Special case software events and allow them to be part of 6484 * Special case software events and allow them to be part of
5870 * any hardware group. 6485 * any hardware group.
@@ -5950,10 +6565,10 @@ SYSCALL_DEFINE5(perf_event_open,
5950 struct perf_event_context *gctx = group_leader->ctx; 6565 struct perf_event_context *gctx = group_leader->ctx;
5951 6566
5952 mutex_lock(&gctx->mutex); 6567 mutex_lock(&gctx->mutex);
5953 perf_event_remove_from_context(group_leader); 6568 perf_remove_from_context(group_leader);
5954 list_for_each_entry(sibling, &group_leader->sibling_list, 6569 list_for_each_entry(sibling, &group_leader->sibling_list,
5955 group_entry) { 6570 group_entry) {
5956 perf_event_remove_from_context(sibling); 6571 perf_remove_from_context(sibling);
5957 put_ctx(gctx); 6572 put_ctx(gctx);
5958 } 6573 }
5959 mutex_unlock(&gctx->mutex); 6574 mutex_unlock(&gctx->mutex);
@@ -5976,6 +6591,7 @@ SYSCALL_DEFINE5(perf_event_open,
5976 6591
5977 perf_install_in_context(ctx, event, cpu); 6592 perf_install_in_context(ctx, event, cpu);
5978 ++ctx->generation; 6593 ++ctx->generation;
6594 perf_unpin_context(ctx);
5979 mutex_unlock(&ctx->mutex); 6595 mutex_unlock(&ctx->mutex);
5980 6596
5981 event->owner = current; 6597 event->owner = current;
@@ -6001,6 +6617,7 @@ SYSCALL_DEFINE5(perf_event_open,
6001 return event_fd; 6617 return event_fd;
6002 6618
6003err_context: 6619err_context:
6620 perf_unpin_context(ctx);
6004 put_ctx(ctx); 6621 put_ctx(ctx);
6005err_alloc: 6622err_alloc:
6006 free_event(event); 6623 free_event(event);
@@ -6051,6 +6668,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
6051 mutex_lock(&ctx->mutex); 6668 mutex_lock(&ctx->mutex);
6052 perf_install_in_context(ctx, event, cpu); 6669 perf_install_in_context(ctx, event, cpu);
6053 ++ctx->generation; 6670 ++ctx->generation;
6671 perf_unpin_context(ctx);
6054 mutex_unlock(&ctx->mutex); 6672 mutex_unlock(&ctx->mutex);
6055 6673
6056 return event; 6674 return event;
@@ -6104,7 +6722,7 @@ __perf_event_exit_task(struct perf_event *child_event,
6104{ 6722{
6105 struct perf_event *parent_event; 6723 struct perf_event *parent_event;
6106 6724
6107 perf_event_remove_from_context(child_event); 6725 perf_remove_from_context(child_event);
6108 6726
6109 parent_event = child_event->parent; 6727 parent_event = child_event->parent;
6110 /* 6728 /*
@@ -6411,7 +7029,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent,
6411 return 0; 7029 return 0;
6412 } 7030 }
6413 7031
6414 child_ctx = child->perf_event_ctxp[ctxn]; 7032 child_ctx = child->perf_event_ctxp[ctxn];
6415 if (!child_ctx) { 7033 if (!child_ctx) {
6416 /* 7034 /*
6417 * This is executed from the parent task context, so 7035 * This is executed from the parent task context, so
@@ -6526,6 +7144,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6526 mutex_unlock(&parent_ctx->mutex); 7144 mutex_unlock(&parent_ctx->mutex);
6527 7145
6528 perf_unpin_context(parent_ctx); 7146 perf_unpin_context(parent_ctx);
7147 put_ctx(parent_ctx);
6529 7148
6530 return ret; 7149 return ret;
6531} 7150}
@@ -6595,9 +7214,9 @@ static void __perf_event_exit_context(void *__info)
6595 perf_pmu_rotate_stop(ctx->pmu); 7214 perf_pmu_rotate_stop(ctx->pmu);
6596 7215
6597 list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) 7216 list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
6598 __perf_event_remove_from_context(event); 7217 __perf_remove_from_context(event);
6599 list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) 7218 list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
6600 __perf_event_remove_from_context(event); 7219 __perf_remove_from_context(event);
6601} 7220}
6602 7221
6603static void perf_event_exit_cpu_context(int cpu) 7222static void perf_event_exit_cpu_context(int cpu)
@@ -6721,3 +7340,83 @@ unlock:
6721 return ret; 7340 return ret;
6722} 7341}
6723device_initcall(perf_event_sysfs_init); 7342device_initcall(perf_event_sysfs_init);
7343
7344#ifdef CONFIG_CGROUP_PERF
7345static struct cgroup_subsys_state *perf_cgroup_create(
7346 struct cgroup_subsys *ss, struct cgroup *cont)
7347{
7348 struct perf_cgroup *jc;
7349
7350 jc = kzalloc(sizeof(*jc), GFP_KERNEL);
7351 if (!jc)
7352 return ERR_PTR(-ENOMEM);
7353
7354 jc->info = alloc_percpu(struct perf_cgroup_info);
7355 if (!jc->info) {
7356 kfree(jc);
7357 return ERR_PTR(-ENOMEM);
7358 }
7359
7360 return &jc->css;
7361}
7362
7363static void perf_cgroup_destroy(struct cgroup_subsys *ss,
7364 struct cgroup *cont)
7365{
7366 struct perf_cgroup *jc;
7367 jc = container_of(cgroup_subsys_state(cont, perf_subsys_id),
7368 struct perf_cgroup, css);
7369 free_percpu(jc->info);
7370 kfree(jc);
7371}
7372
7373static int __perf_cgroup_move(void *info)
7374{
7375 struct task_struct *task = info;
7376 perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN);
7377 return 0;
7378}
7379
7380static void perf_cgroup_move(struct task_struct *task)
7381{
7382 task_function_call(task, __perf_cgroup_move, task);
7383}
7384
7385static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
7386 struct cgroup *old_cgrp, struct task_struct *task,
7387 bool threadgroup)
7388{
7389 perf_cgroup_move(task);
7390 if (threadgroup) {
7391 struct task_struct *c;
7392 rcu_read_lock();
7393 list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
7394 perf_cgroup_move(c);
7395 }
7396 rcu_read_unlock();
7397 }
7398}
7399
7400static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
7401 struct cgroup *old_cgrp, struct task_struct *task)
7402{
7403 /*
7404 * cgroup_exit() is called in the copy_process() failure path.
7405 * Ignore this case since the task hasn't ran yet, this avoids
7406 * trying to poke a half freed task state from generic code.
7407 */
7408 if (!(task->flags & PF_EXITING))
7409 return;
7410
7411 perf_cgroup_move(task);
7412}
7413
7414struct cgroup_subsys perf_subsys = {
7415 .name = "perf_event",
7416 .subsys_id = perf_subsys_id,
7417 .create = perf_cgroup_create,
7418 .destroy = perf_cgroup_destroy,
7419 .exit = perf_cgroup_exit,
7420 .attach = perf_cgroup_attach,
7421};
7422#endif /* CONFIG_CGROUP_PERF */
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 05bb7173850e..67fea9d25d55 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -176,7 +176,8 @@ static inline cputime_t virt_ticks(struct task_struct *p)
176 return p->utime; 176 return p->utime;
177} 177}
178 178
179int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) 179static int
180posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
180{ 181{
181 int error = check_clock(which_clock); 182 int error = check_clock(which_clock);
182 if (!error) { 183 if (!error) {
@@ -194,7 +195,8 @@ int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
194 return error; 195 return error;
195} 196}
196 197
197int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp) 198static int
199posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
198{ 200{
199 /* 201 /*
200 * You can never reset a CPU clock, but we check for other errors 202 * You can never reset a CPU clock, but we check for other errors
@@ -317,7 +319,7 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
317} 319}
318 320
319 321
320int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) 322static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
321{ 323{
322 const pid_t pid = CPUCLOCK_PID(which_clock); 324 const pid_t pid = CPUCLOCK_PID(which_clock);
323 int error = -EINVAL; 325 int error = -EINVAL;
@@ -379,7 +381,7 @@ int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
379 * This is called from sys_timer_create() and do_cpu_nanosleep() with the 381 * This is called from sys_timer_create() and do_cpu_nanosleep() with the
380 * new timer already all-zeros initialized. 382 * new timer already all-zeros initialized.
381 */ 383 */
382int posix_cpu_timer_create(struct k_itimer *new_timer) 384static int posix_cpu_timer_create(struct k_itimer *new_timer)
383{ 385{
384 int ret = 0; 386 int ret = 0;
385 const pid_t pid = CPUCLOCK_PID(new_timer->it_clock); 387 const pid_t pid = CPUCLOCK_PID(new_timer->it_clock);
@@ -425,7 +427,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
425 * If we return TIMER_RETRY, it's necessary to release the timer's lock 427 * If we return TIMER_RETRY, it's necessary to release the timer's lock
426 * and try again. (This happens when the timer is in the middle of firing.) 428 * and try again. (This happens when the timer is in the middle of firing.)
427 */ 429 */
428int posix_cpu_timer_del(struct k_itimer *timer) 430static int posix_cpu_timer_del(struct k_itimer *timer)
429{ 431{
430 struct task_struct *p = timer->it.cpu.task; 432 struct task_struct *p = timer->it.cpu.task;
431 int ret = 0; 433 int ret = 0;
@@ -665,8 +667,8 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
665 * If we return TIMER_RETRY, it's necessary to release the timer's lock 667 * If we return TIMER_RETRY, it's necessary to release the timer's lock
666 * and try again. (This happens when the timer is in the middle of firing.) 668 * and try again. (This happens when the timer is in the middle of firing.)
667 */ 669 */
668int posix_cpu_timer_set(struct k_itimer *timer, int flags, 670static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
669 struct itimerspec *new, struct itimerspec *old) 671 struct itimerspec *new, struct itimerspec *old)
670{ 672{
671 struct task_struct *p = timer->it.cpu.task; 673 struct task_struct *p = timer->it.cpu.task;
672 union cpu_time_count old_expires, new_expires, old_incr, val; 674 union cpu_time_count old_expires, new_expires, old_incr, val;
@@ -820,7 +822,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
820 return ret; 822 return ret;
821} 823}
822 824
823void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) 825static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
824{ 826{
825 union cpu_time_count now; 827 union cpu_time_count now;
826 struct task_struct *p = timer->it.cpu.task; 828 struct task_struct *p = timer->it.cpu.task;
@@ -1481,11 +1483,13 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
1481 return error; 1483 return error;
1482} 1484}
1483 1485
1484int posix_cpu_nsleep(const clockid_t which_clock, int flags, 1486static long posix_cpu_nsleep_restart(struct restart_block *restart_block);
1485 struct timespec *rqtp, struct timespec __user *rmtp) 1487
1488static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
1489 struct timespec *rqtp, struct timespec __user *rmtp)
1486{ 1490{
1487 struct restart_block *restart_block = 1491 struct restart_block *restart_block =
1488 &current_thread_info()->restart_block; 1492 &current_thread_info()->restart_block;
1489 struct itimerspec it; 1493 struct itimerspec it;
1490 int error; 1494 int error;
1491 1495
@@ -1501,56 +1505,47 @@ int posix_cpu_nsleep(const clockid_t which_clock, int flags,
1501 1505
1502 if (error == -ERESTART_RESTARTBLOCK) { 1506 if (error == -ERESTART_RESTARTBLOCK) {
1503 1507
1504 if (flags & TIMER_ABSTIME) 1508 if (flags & TIMER_ABSTIME)
1505 return -ERESTARTNOHAND; 1509 return -ERESTARTNOHAND;
1506 /* 1510 /*
1507 * Report back to the user the time still remaining. 1511 * Report back to the user the time still remaining.
1508 */ 1512 */
1509 if (rmtp != NULL && copy_to_user(rmtp, &it.it_value, sizeof *rmtp)) 1513 if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
1510 return -EFAULT; 1514 return -EFAULT;
1511 1515
1512 restart_block->fn = posix_cpu_nsleep_restart; 1516 restart_block->fn = posix_cpu_nsleep_restart;
1513 restart_block->arg0 = which_clock; 1517 restart_block->nanosleep.index = which_clock;
1514 restart_block->arg1 = (unsigned long) rmtp; 1518 restart_block->nanosleep.rmtp = rmtp;
1515 restart_block->arg2 = rqtp->tv_sec; 1519 restart_block->nanosleep.expires = timespec_to_ns(rqtp);
1516 restart_block->arg3 = rqtp->tv_nsec;
1517 } 1520 }
1518 return error; 1521 return error;
1519} 1522}
1520 1523
1521long posix_cpu_nsleep_restart(struct restart_block *restart_block) 1524static long posix_cpu_nsleep_restart(struct restart_block *restart_block)
1522{ 1525{
1523 clockid_t which_clock = restart_block->arg0; 1526 clockid_t which_clock = restart_block->nanosleep.index;
1524 struct timespec __user *rmtp;
1525 struct timespec t; 1527 struct timespec t;
1526 struct itimerspec it; 1528 struct itimerspec it;
1527 int error; 1529 int error;
1528 1530
1529 rmtp = (struct timespec __user *) restart_block->arg1; 1531 t = ns_to_timespec(restart_block->nanosleep.expires);
1530 t.tv_sec = restart_block->arg2;
1531 t.tv_nsec = restart_block->arg3;
1532 1532
1533 restart_block->fn = do_no_restart_syscall;
1534 error = do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t, &it); 1533 error = do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t, &it);
1535 1534
1536 if (error == -ERESTART_RESTARTBLOCK) { 1535 if (error == -ERESTART_RESTARTBLOCK) {
1536 struct timespec __user *rmtp = restart_block->nanosleep.rmtp;
1537 /* 1537 /*
1538 * Report back to the user the time still remaining. 1538 * Report back to the user the time still remaining.
1539 */ 1539 */
1540 if (rmtp != NULL && copy_to_user(rmtp, &it.it_value, sizeof *rmtp)) 1540 if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
1541 return -EFAULT; 1541 return -EFAULT;
1542 1542
1543 restart_block->fn = posix_cpu_nsleep_restart; 1543 restart_block->nanosleep.expires = timespec_to_ns(&t);
1544 restart_block->arg0 = which_clock;
1545 restart_block->arg1 = (unsigned long) rmtp;
1546 restart_block->arg2 = t.tv_sec;
1547 restart_block->arg3 = t.tv_nsec;
1548 } 1544 }
1549 return error; 1545 return error;
1550 1546
1551} 1547}
1552 1548
1553
1554#define PROCESS_CLOCK MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED) 1549#define PROCESS_CLOCK MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED)
1555#define THREAD_CLOCK MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED) 1550#define THREAD_CLOCK MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED)
1556 1551
@@ -1594,38 +1589,37 @@ static int thread_cpu_timer_create(struct k_itimer *timer)
1594 timer->it_clock = THREAD_CLOCK; 1589 timer->it_clock = THREAD_CLOCK;
1595 return posix_cpu_timer_create(timer); 1590 return posix_cpu_timer_create(timer);
1596} 1591}
1597static int thread_cpu_nsleep(const clockid_t which_clock, int flags, 1592
1598 struct timespec *rqtp, struct timespec __user *rmtp) 1593struct k_clock clock_posix_cpu = {
1599{ 1594 .clock_getres = posix_cpu_clock_getres,
1600 return -EINVAL; 1595 .clock_set = posix_cpu_clock_set,
1601} 1596 .clock_get = posix_cpu_clock_get,
1602static long thread_cpu_nsleep_restart(struct restart_block *restart_block) 1597 .timer_create = posix_cpu_timer_create,
1603{ 1598 .nsleep = posix_cpu_nsleep,
1604 return -EINVAL; 1599 .nsleep_restart = posix_cpu_nsleep_restart,
1605} 1600 .timer_set = posix_cpu_timer_set,
1601 .timer_del = posix_cpu_timer_del,
1602 .timer_get = posix_cpu_timer_get,
1603};
1606 1604
1607static __init int init_posix_cpu_timers(void) 1605static __init int init_posix_cpu_timers(void)
1608{ 1606{
1609 struct k_clock process = { 1607 struct k_clock process = {
1610 .clock_getres = process_cpu_clock_getres, 1608 .clock_getres = process_cpu_clock_getres,
1611 .clock_get = process_cpu_clock_get, 1609 .clock_get = process_cpu_clock_get,
1612 .clock_set = do_posix_clock_nosettime, 1610 .timer_create = process_cpu_timer_create,
1613 .timer_create = process_cpu_timer_create, 1611 .nsleep = process_cpu_nsleep,
1614 .nsleep = process_cpu_nsleep, 1612 .nsleep_restart = process_cpu_nsleep_restart,
1615 .nsleep_restart = process_cpu_nsleep_restart,
1616 }; 1613 };
1617 struct k_clock thread = { 1614 struct k_clock thread = {
1618 .clock_getres = thread_cpu_clock_getres, 1615 .clock_getres = thread_cpu_clock_getres,
1619 .clock_get = thread_cpu_clock_get, 1616 .clock_get = thread_cpu_clock_get,
1620 .clock_set = do_posix_clock_nosettime, 1617 .timer_create = thread_cpu_timer_create,
1621 .timer_create = thread_cpu_timer_create,
1622 .nsleep = thread_cpu_nsleep,
1623 .nsleep_restart = thread_cpu_nsleep_restart,
1624 }; 1618 };
1625 struct timespec ts; 1619 struct timespec ts;
1626 1620
1627 register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process); 1621 posix_timers_register_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
1628 register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread); 1622 posix_timers_register_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
1629 1623
1630 cputime_to_timespec(cputime_one_jiffy, &ts); 1624 cputime_to_timespec(cputime_one_jiffy, &ts);
1631 onecputick = ts.tv_nsec; 1625 onecputick = ts.tv_nsec;
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 93bd2eb2bc53..4c0124919f9a 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -41,6 +41,7 @@
41#include <linux/init.h> 41#include <linux/init.h>
42#include <linux/compiler.h> 42#include <linux/compiler.h>
43#include <linux/idr.h> 43#include <linux/idr.h>
44#include <linux/posix-clock.h>
44#include <linux/posix-timers.h> 45#include <linux/posix-timers.h>
45#include <linux/syscalls.h> 46#include <linux/syscalls.h>
46#include <linux/wait.h> 47#include <linux/wait.h>
@@ -81,6 +82,14 @@ static DEFINE_SPINLOCK(idr_lock);
81#error "SIGEV_THREAD_ID must not share bit with other SIGEV values!" 82#error "SIGEV_THREAD_ID must not share bit with other SIGEV values!"
82#endif 83#endif
83 84
85/*
86 * parisc wants ENOTSUP instead of EOPNOTSUPP
87 */
88#ifndef ENOTSUP
89# define ENANOSLEEP_NOTSUP EOPNOTSUPP
90#else
91# define ENANOSLEEP_NOTSUP ENOTSUP
92#endif
84 93
85/* 94/*
86 * The timer ID is turned into a timer address by idr_find(). 95 * The timer ID is turned into a timer address by idr_find().
@@ -94,11 +103,7 @@ static DEFINE_SPINLOCK(idr_lock);
94/* 103/*
95 * CLOCKs: The POSIX standard calls for a couple of clocks and allows us 104 * CLOCKs: The POSIX standard calls for a couple of clocks and allows us
96 * to implement others. This structure defines the various 105 * to implement others. This structure defines the various
97 * clocks and allows the possibility of adding others. We 106 * clocks.
98 * provide an interface to add clocks to the table and expect
99 * the "arch" code to add at least one clock that is high
100 * resolution. Here we define the standard CLOCK_REALTIME as a
101 * 1/HZ resolution clock.
102 * 107 *
103 * RESOLUTION: Clock resolution is used to round up timer and interval 108 * RESOLUTION: Clock resolution is used to round up timer and interval
104 * times, NOT to report clock times, which are reported with as 109 * times, NOT to report clock times, which are reported with as
@@ -108,20 +113,13 @@ static DEFINE_SPINLOCK(idr_lock);
108 * necessary code is written. The standard says we should say 113 * necessary code is written. The standard says we should say
109 * something about this issue in the documentation... 114 * something about this issue in the documentation...
110 * 115 *
111 * FUNCTIONS: The CLOCKs structure defines possible functions to handle 116 * FUNCTIONS: The CLOCKs structure defines possible functions to
112 * various clock functions. For clocks that use the standard 117 * handle various clock functions.
113 * system timer code these entries should be NULL. This will
114 * allow dispatch without the overhead of indirect function
115 * calls. CLOCKS that depend on other sources (e.g. WWV or GPS)
116 * must supply functions here, even if the function just returns
117 * ENOSYS. The standard POSIX timer management code assumes the
118 * following: 1.) The k_itimer struct (sched.h) is used for the
119 * timer. 2.) The list, it_lock, it_clock, it_id and it_pid
120 * fields are not modified by timer code.
121 * 118 *
122 * At this time all functions EXCEPT clock_nanosleep can be 119 * The standard POSIX timer management code assumes the
123 * redirected by the CLOCKS structure. Clock_nanosleep is in 120 * following: 1.) The k_itimer struct (sched.h) is used for
124 * there, but the code ignores it. 121 * the timer. 2.) The list, it_lock, it_clock, it_id and
122 * it_pid fields are not modified by timer code.
125 * 123 *
126 * Permissions: It is assumed that the clock_settime() function defined 124 * Permissions: It is assumed that the clock_settime() function defined
127 * for each clock will take care of permission checks. Some 125 * for each clock will take care of permission checks. Some
@@ -138,6 +136,7 @@ static struct k_clock posix_clocks[MAX_CLOCKS];
138 */ 136 */
139static int common_nsleep(const clockid_t, int flags, struct timespec *t, 137static int common_nsleep(const clockid_t, int flags, struct timespec *t,
140 struct timespec __user *rmtp); 138 struct timespec __user *rmtp);
139static int common_timer_create(struct k_itimer *new_timer);
141static void common_timer_get(struct k_itimer *, struct itimerspec *); 140static void common_timer_get(struct k_itimer *, struct itimerspec *);
142static int common_timer_set(struct k_itimer *, int, 141static int common_timer_set(struct k_itimer *, int,
143 struct itimerspec *, struct itimerspec *); 142 struct itimerspec *, struct itimerspec *);
@@ -158,76 +157,24 @@ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
158 spin_unlock_irqrestore(&timr->it_lock, flags); 157 spin_unlock_irqrestore(&timr->it_lock, flags);
159} 158}
160 159
161/* 160/* Get clock_realtime */
162 * Call the k_clock hook function if non-null, or the default function. 161static int posix_clock_realtime_get(clockid_t which_clock, struct timespec *tp)
163 */
164#define CLOCK_DISPATCH(clock, call, arglist) \
165 ((clock) < 0 ? posix_cpu_##call arglist : \
166 (posix_clocks[clock].call != NULL \
167 ? (*posix_clocks[clock].call) arglist : common_##call arglist))
168
169/*
170 * Default clock hook functions when the struct k_clock passed
171 * to register_posix_clock leaves a function pointer null.
172 *
173 * The function common_CALL is the default implementation for
174 * the function pointer CALL in struct k_clock.
175 */
176
177static inline int common_clock_getres(const clockid_t which_clock,
178 struct timespec *tp)
179{
180 tp->tv_sec = 0;
181 tp->tv_nsec = posix_clocks[which_clock].res;
182 return 0;
183}
184
185/*
186 * Get real time for posix timers
187 */
188static int common_clock_get(clockid_t which_clock, struct timespec *tp)
189{ 162{
190 ktime_get_real_ts(tp); 163 ktime_get_real_ts(tp);
191 return 0; 164 return 0;
192} 165}
193 166
194static inline int common_clock_set(const clockid_t which_clock, 167/* Set clock_realtime */
195 struct timespec *tp) 168static int posix_clock_realtime_set(const clockid_t which_clock,
169 const struct timespec *tp)
196{ 170{
197 return do_sys_settimeofday(tp, NULL); 171 return do_sys_settimeofday(tp, NULL);
198} 172}
199 173
200static int common_timer_create(struct k_itimer *new_timer) 174static int posix_clock_realtime_adj(const clockid_t which_clock,
201{ 175 struct timex *t)
202 hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0);
203 return 0;
204}
205
206static int no_timer_create(struct k_itimer *new_timer)
207{
208 return -EOPNOTSUPP;
209}
210
211static int no_nsleep(const clockid_t which_clock, int flags,
212 struct timespec *tsave, struct timespec __user *rmtp)
213{
214 return -EOPNOTSUPP;
215}
216
217/*
218 * Return nonzero if we know a priori this clockid_t value is bogus.
219 */
220static inline int invalid_clockid(const clockid_t which_clock)
221{ 176{
222 if (which_clock < 0) /* CPU clock, posix_cpu_* will check it */ 177 return do_adjtimex(t);
223 return 0;
224 if ((unsigned) which_clock >= MAX_CLOCKS)
225 return 1;
226 if (posix_clocks[which_clock].clock_getres != NULL)
227 return 0;
228 if (posix_clocks[which_clock].res != 0)
229 return 0;
230 return 1;
231} 178}
232 179
233/* 180/*
@@ -240,7 +187,7 @@ static int posix_ktime_get_ts(clockid_t which_clock, struct timespec *tp)
240} 187}
241 188
242/* 189/*
243 * Get monotonic time for posix timers 190 * Get monotonic-raw time for posix timers
244 */ 191 */
245static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp) 192static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp)
246{ 193{
@@ -267,46 +214,70 @@ static int posix_get_coarse_res(const clockid_t which_clock, struct timespec *tp
267 *tp = ktime_to_timespec(KTIME_LOW_RES); 214 *tp = ktime_to_timespec(KTIME_LOW_RES);
268 return 0; 215 return 0;
269} 216}
217
218static int posix_get_boottime(const clockid_t which_clock, struct timespec *tp)
219{
220 get_monotonic_boottime(tp);
221 return 0;
222}
223
224
270/* 225/*
271 * Initialize everything, well, just everything in Posix clocks/timers ;) 226 * Initialize everything, well, just everything in Posix clocks/timers ;)
272 */ 227 */
273static __init int init_posix_timers(void) 228static __init int init_posix_timers(void)
274{ 229{
275 struct k_clock clock_realtime = { 230 struct k_clock clock_realtime = {
276 .clock_getres = hrtimer_get_res, 231 .clock_getres = hrtimer_get_res,
232 .clock_get = posix_clock_realtime_get,
233 .clock_set = posix_clock_realtime_set,
234 .clock_adj = posix_clock_realtime_adj,
235 .nsleep = common_nsleep,
236 .nsleep_restart = hrtimer_nanosleep_restart,
237 .timer_create = common_timer_create,
238 .timer_set = common_timer_set,
239 .timer_get = common_timer_get,
240 .timer_del = common_timer_del,
277 }; 241 };
278 struct k_clock clock_monotonic = { 242 struct k_clock clock_monotonic = {
279 .clock_getres = hrtimer_get_res, 243 .clock_getres = hrtimer_get_res,
280 .clock_get = posix_ktime_get_ts, 244 .clock_get = posix_ktime_get_ts,
281 .clock_set = do_posix_clock_nosettime, 245 .nsleep = common_nsleep,
246 .nsleep_restart = hrtimer_nanosleep_restart,
247 .timer_create = common_timer_create,
248 .timer_set = common_timer_set,
249 .timer_get = common_timer_get,
250 .timer_del = common_timer_del,
282 }; 251 };
283 struct k_clock clock_monotonic_raw = { 252 struct k_clock clock_monotonic_raw = {
284 .clock_getres = hrtimer_get_res, 253 .clock_getres = hrtimer_get_res,
285 .clock_get = posix_get_monotonic_raw, 254 .clock_get = posix_get_monotonic_raw,
286 .clock_set = do_posix_clock_nosettime,
287 .timer_create = no_timer_create,
288 .nsleep = no_nsleep,
289 }; 255 };
290 struct k_clock clock_realtime_coarse = { 256 struct k_clock clock_realtime_coarse = {
291 .clock_getres = posix_get_coarse_res, 257 .clock_getres = posix_get_coarse_res,
292 .clock_get = posix_get_realtime_coarse, 258 .clock_get = posix_get_realtime_coarse,
293 .clock_set = do_posix_clock_nosettime,
294 .timer_create = no_timer_create,
295 .nsleep = no_nsleep,
296 }; 259 };
297 struct k_clock clock_monotonic_coarse = { 260 struct k_clock clock_monotonic_coarse = {
298 .clock_getres = posix_get_coarse_res, 261 .clock_getres = posix_get_coarse_res,
299 .clock_get = posix_get_monotonic_coarse, 262 .clock_get = posix_get_monotonic_coarse,
300 .clock_set = do_posix_clock_nosettime, 263 };
301 .timer_create = no_timer_create, 264 struct k_clock clock_boottime = {
302 .nsleep = no_nsleep, 265 .clock_getres = hrtimer_get_res,
266 .clock_get = posix_get_boottime,
267 .nsleep = common_nsleep,
268 .nsleep_restart = hrtimer_nanosleep_restart,
269 .timer_create = common_timer_create,
270 .timer_set = common_timer_set,
271 .timer_get = common_timer_get,
272 .timer_del = common_timer_del,
303 }; 273 };
304 274
305 register_posix_clock(CLOCK_REALTIME, &clock_realtime); 275 posix_timers_register_clock(CLOCK_REALTIME, &clock_realtime);
306 register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic); 276 posix_timers_register_clock(CLOCK_MONOTONIC, &clock_monotonic);
307 register_posix_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw); 277 posix_timers_register_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw);
308 register_posix_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse); 278 posix_timers_register_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
309 register_posix_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse); 279 posix_timers_register_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
280 posix_timers_register_clock(CLOCK_BOOTTIME, &clock_boottime);
310 281
311 posix_timers_cache = kmem_cache_create("posix_timers_cache", 282 posix_timers_cache = kmem_cache_create("posix_timers_cache",
312 sizeof (struct k_itimer), 0, SLAB_PANIC, 283 sizeof (struct k_itimer), 0, SLAB_PANIC,
@@ -482,17 +453,29 @@ static struct pid *good_sigevent(sigevent_t * event)
482 return task_pid(rtn); 453 return task_pid(rtn);
483} 454}
484 455
485void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock) 456void posix_timers_register_clock(const clockid_t clock_id,
457 struct k_clock *new_clock)
486{ 458{
487 if ((unsigned) clock_id >= MAX_CLOCKS) { 459 if ((unsigned) clock_id >= MAX_CLOCKS) {
488 printk("POSIX clock register failed for clock_id %d\n", 460 printk(KERN_WARNING "POSIX clock register failed for clock_id %d\n",
461 clock_id);
462 return;
463 }
464
465 if (!new_clock->clock_get) {
466 printk(KERN_WARNING "POSIX clock id %d lacks clock_get()\n",
467 clock_id);
468 return;
469 }
470 if (!new_clock->clock_getres) {
471 printk(KERN_WARNING "POSIX clock id %d lacks clock_getres()\n",
489 clock_id); 472 clock_id);
490 return; 473 return;
491 } 474 }
492 475
493 posix_clocks[clock_id] = *new_clock; 476 posix_clocks[clock_id] = *new_clock;
494} 477}
495EXPORT_SYMBOL_GPL(register_posix_clock); 478EXPORT_SYMBOL_GPL(posix_timers_register_clock);
496 479
497static struct k_itimer * alloc_posix_timer(void) 480static struct k_itimer * alloc_posix_timer(void)
498{ 481{
@@ -523,19 +506,39 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
523 kmem_cache_free(posix_timers_cache, tmr); 506 kmem_cache_free(posix_timers_cache, tmr);
524} 507}
525 508
509static struct k_clock *clockid_to_kclock(const clockid_t id)
510{
511 if (id < 0)
512 return (id & CLOCKFD_MASK) == CLOCKFD ?
513 &clock_posix_dynamic : &clock_posix_cpu;
514
515 if (id >= MAX_CLOCKS || !posix_clocks[id].clock_getres)
516 return NULL;
517 return &posix_clocks[id];
518}
519
520static int common_timer_create(struct k_itimer *new_timer)
521{
522 hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0);
523 return 0;
524}
525
526/* Create a POSIX.1b interval timer. */ 526/* Create a POSIX.1b interval timer. */
527 527
528SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, 528SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
529 struct sigevent __user *, timer_event_spec, 529 struct sigevent __user *, timer_event_spec,
530 timer_t __user *, created_timer_id) 530 timer_t __user *, created_timer_id)
531{ 531{
532 struct k_clock *kc = clockid_to_kclock(which_clock);
532 struct k_itimer *new_timer; 533 struct k_itimer *new_timer;
533 int error, new_timer_id; 534 int error, new_timer_id;
534 sigevent_t event; 535 sigevent_t event;
535 int it_id_set = IT_ID_NOT_SET; 536 int it_id_set = IT_ID_NOT_SET;
536 537
537 if (invalid_clockid(which_clock)) 538 if (!kc)
538 return -EINVAL; 539 return -EINVAL;
540 if (!kc->timer_create)
541 return -EOPNOTSUPP;
539 542
540 new_timer = alloc_posix_timer(); 543 new_timer = alloc_posix_timer();
541 if (unlikely(!new_timer)) 544 if (unlikely(!new_timer))
@@ -597,7 +600,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
597 goto out; 600 goto out;
598 } 601 }
599 602
600 error = CLOCK_DISPATCH(which_clock, timer_create, (new_timer)); 603 error = kc->timer_create(new_timer);
601 if (error) 604 if (error)
602 goto out; 605 goto out;
603 606
@@ -607,7 +610,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
607 spin_unlock_irq(&current->sighand->siglock); 610 spin_unlock_irq(&current->sighand->siglock);
608 611
609 return 0; 612 return 0;
610 /* 613 /*
611 * In the case of the timer belonging to another task, after 614 * In the case of the timer belonging to another task, after
612 * the task is unlocked, the timer is owned by the other task 615 * the task is unlocked, the timer is owned by the other task
613 * and may cease to exist at any time. Don't use or modify 616 * and may cease to exist at any time. Don't use or modify
@@ -709,22 +712,28 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
709SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, 712SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
710 struct itimerspec __user *, setting) 713 struct itimerspec __user *, setting)
711{ 714{
712 struct k_itimer *timr;
713 struct itimerspec cur_setting; 715 struct itimerspec cur_setting;
716 struct k_itimer *timr;
717 struct k_clock *kc;
714 unsigned long flags; 718 unsigned long flags;
719 int ret = 0;
715 720
716 timr = lock_timer(timer_id, &flags); 721 timr = lock_timer(timer_id, &flags);
717 if (!timr) 722 if (!timr)
718 return -EINVAL; 723 return -EINVAL;
719 724
720 CLOCK_DISPATCH(timr->it_clock, timer_get, (timr, &cur_setting)); 725 kc = clockid_to_kclock(timr->it_clock);
726 if (WARN_ON_ONCE(!kc || !kc->timer_get))
727 ret = -EINVAL;
728 else
729 kc->timer_get(timr, &cur_setting);
721 730
722 unlock_timer(timr, flags); 731 unlock_timer(timr, flags);
723 732
724 if (copy_to_user(setting, &cur_setting, sizeof (cur_setting))) 733 if (!ret && copy_to_user(setting, &cur_setting, sizeof (cur_setting)))
725 return -EFAULT; 734 return -EFAULT;
726 735
727 return 0; 736 return ret;
728} 737}
729 738
730/* 739/*
@@ -813,6 +822,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
813 int error = 0; 822 int error = 0;
814 unsigned long flag; 823 unsigned long flag;
815 struct itimerspec *rtn = old_setting ? &old_spec : NULL; 824 struct itimerspec *rtn = old_setting ? &old_spec : NULL;
825 struct k_clock *kc;
816 826
817 if (!new_setting) 827 if (!new_setting)
818 return -EINVAL; 828 return -EINVAL;
@@ -828,8 +838,11 @@ retry:
828 if (!timr) 838 if (!timr)
829 return -EINVAL; 839 return -EINVAL;
830 840
831 error = CLOCK_DISPATCH(timr->it_clock, timer_set, 841 kc = clockid_to_kclock(timr->it_clock);
832 (timr, flags, &new_spec, rtn)); 842 if (WARN_ON_ONCE(!kc || !kc->timer_set))
843 error = -EINVAL;
844 else
845 error = kc->timer_set(timr, flags, &new_spec, rtn);
833 846
834 unlock_timer(timr, flag); 847 unlock_timer(timr, flag);
835 if (error == TIMER_RETRY) { 848 if (error == TIMER_RETRY) {
@@ -844,7 +857,7 @@ retry:
844 return error; 857 return error;
845} 858}
846 859
847static inline int common_timer_del(struct k_itimer *timer) 860static int common_timer_del(struct k_itimer *timer)
848{ 861{
849 timer->it.real.interval.tv64 = 0; 862 timer->it.real.interval.tv64 = 0;
850 863
@@ -855,7 +868,11 @@ static inline int common_timer_del(struct k_itimer *timer)
855 868
856static inline int timer_delete_hook(struct k_itimer *timer) 869static inline int timer_delete_hook(struct k_itimer *timer)
857{ 870{
858 return CLOCK_DISPATCH(timer->it_clock, timer_del, (timer)); 871 struct k_clock *kc = clockid_to_kclock(timer->it_clock);
872
873 if (WARN_ON_ONCE(!kc || !kc->timer_del))
874 return -EINVAL;
875 return kc->timer_del(timer);
859} 876}
860 877
861/* Delete a POSIX.1b interval timer. */ 878/* Delete a POSIX.1b interval timer. */
@@ -927,69 +944,76 @@ void exit_itimers(struct signal_struct *sig)
927 } 944 }
928} 945}
929 946
930/* Not available / possible... functions */
931int do_posix_clock_nosettime(const clockid_t clockid, struct timespec *tp)
932{
933 return -EINVAL;
934}
935EXPORT_SYMBOL_GPL(do_posix_clock_nosettime);
936
937int do_posix_clock_nonanosleep(const clockid_t clock, int flags,
938 struct timespec *t, struct timespec __user *r)
939{
940#ifndef ENOTSUP
941 return -EOPNOTSUPP; /* aka ENOTSUP in userland for POSIX */
942#else /* parisc does define it separately. */
943 return -ENOTSUP;
944#endif
945}
946EXPORT_SYMBOL_GPL(do_posix_clock_nonanosleep);
947
948SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, 947SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
949 const struct timespec __user *, tp) 948 const struct timespec __user *, tp)
950{ 949{
950 struct k_clock *kc = clockid_to_kclock(which_clock);
951 struct timespec new_tp; 951 struct timespec new_tp;
952 952
953 if (invalid_clockid(which_clock)) 953 if (!kc || !kc->clock_set)
954 return -EINVAL; 954 return -EINVAL;
955
955 if (copy_from_user(&new_tp, tp, sizeof (*tp))) 956 if (copy_from_user(&new_tp, tp, sizeof (*tp)))
956 return -EFAULT; 957 return -EFAULT;
957 958
958 return CLOCK_DISPATCH(which_clock, clock_set, (which_clock, &new_tp)); 959 return kc->clock_set(which_clock, &new_tp);
959} 960}
960 961
961SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, 962SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
962 struct timespec __user *,tp) 963 struct timespec __user *,tp)
963{ 964{
965 struct k_clock *kc = clockid_to_kclock(which_clock);
964 struct timespec kernel_tp; 966 struct timespec kernel_tp;
965 int error; 967 int error;
966 968
967 if (invalid_clockid(which_clock)) 969 if (!kc)
968 return -EINVAL; 970 return -EINVAL;
969 error = CLOCK_DISPATCH(which_clock, clock_get, 971
970 (which_clock, &kernel_tp)); 972 error = kc->clock_get(which_clock, &kernel_tp);
973
971 if (!error && copy_to_user(tp, &kernel_tp, sizeof (kernel_tp))) 974 if (!error && copy_to_user(tp, &kernel_tp, sizeof (kernel_tp)))
972 error = -EFAULT; 975 error = -EFAULT;
973 976
974 return error; 977 return error;
978}
979
980SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
981 struct timex __user *, utx)
982{
983 struct k_clock *kc = clockid_to_kclock(which_clock);
984 struct timex ktx;
985 int err;
986
987 if (!kc)
988 return -EINVAL;
989 if (!kc->clock_adj)
990 return -EOPNOTSUPP;
991
992 if (copy_from_user(&ktx, utx, sizeof(ktx)))
993 return -EFAULT;
994
995 err = kc->clock_adj(which_clock, &ktx);
996
997 if (!err && copy_to_user(utx, &ktx, sizeof(ktx)))
998 return -EFAULT;
975 999
1000 return err;
976} 1001}
977 1002
978SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, 1003SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock,
979 struct timespec __user *, tp) 1004 struct timespec __user *, tp)
980{ 1005{
1006 struct k_clock *kc = clockid_to_kclock(which_clock);
981 struct timespec rtn_tp; 1007 struct timespec rtn_tp;
982 int error; 1008 int error;
983 1009
984 if (invalid_clockid(which_clock)) 1010 if (!kc)
985 return -EINVAL; 1011 return -EINVAL;
986 1012
987 error = CLOCK_DISPATCH(which_clock, clock_getres, 1013 error = kc->clock_getres(which_clock, &rtn_tp);
988 (which_clock, &rtn_tp));
989 1014
990 if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp))) { 1015 if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp)))
991 error = -EFAULT; 1016 error = -EFAULT;
992 }
993 1017
994 return error; 1018 return error;
995} 1019}
@@ -1009,10 +1033,13 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
1009 const struct timespec __user *, rqtp, 1033 const struct timespec __user *, rqtp,
1010 struct timespec __user *, rmtp) 1034 struct timespec __user *, rmtp)
1011{ 1035{
1036 struct k_clock *kc = clockid_to_kclock(which_clock);
1012 struct timespec t; 1037 struct timespec t;
1013 1038
1014 if (invalid_clockid(which_clock)) 1039 if (!kc)
1015 return -EINVAL; 1040 return -EINVAL;
1041 if (!kc->nsleep)
1042 return -ENANOSLEEP_NOTSUP;
1016 1043
1017 if (copy_from_user(&t, rqtp, sizeof (struct timespec))) 1044 if (copy_from_user(&t, rqtp, sizeof (struct timespec)))
1018 return -EFAULT; 1045 return -EFAULT;
@@ -1020,27 +1047,20 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
1020 if (!timespec_valid(&t)) 1047 if (!timespec_valid(&t))
1021 return -EINVAL; 1048 return -EINVAL;
1022 1049
1023 return CLOCK_DISPATCH(which_clock, nsleep, 1050 return kc->nsleep(which_clock, flags, &t, rmtp);
1024 (which_clock, flags, &t, rmtp));
1025}
1026
1027/*
1028 * nanosleep_restart for monotonic and realtime clocks
1029 */
1030static int common_nsleep_restart(struct restart_block *restart_block)
1031{
1032 return hrtimer_nanosleep_restart(restart_block);
1033} 1051}
1034 1052
1035/* 1053/*
1036 * This will restart clock_nanosleep. This is required only by 1054 * This will restart clock_nanosleep. This is required only by
1037 * compat_clock_nanosleep_restart for now. 1055 * compat_clock_nanosleep_restart for now.
1038 */ 1056 */
1039long 1057long clock_nanosleep_restart(struct restart_block *restart_block)
1040clock_nanosleep_restart(struct restart_block *restart_block)
1041{ 1058{
1042 clockid_t which_clock = restart_block->arg0; 1059 clockid_t which_clock = restart_block->nanosleep.index;
1060 struct k_clock *kc = clockid_to_kclock(which_clock);
1061
1062 if (WARN_ON_ONCE(!kc || !kc->nsleep_restart))
1063 return -EINVAL;
1043 1064
1044 return CLOCK_DISPATCH(which_clock, nsleep_restart, 1065 return kc->nsleep_restart(restart_block);
1045 (restart_block));
1046} 1066}
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 1708b1e2972d..e2302e40b360 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -163,7 +163,7 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode)
163 return !err; 163 return !err;
164} 164}
165 165
166int ptrace_attach(struct task_struct *task) 166static int ptrace_attach(struct task_struct *task)
167{ 167{
168 int retval; 168 int retval;
169 169
@@ -219,7 +219,7 @@ out:
219 * Performs checks and sets PT_PTRACED. 219 * Performs checks and sets PT_PTRACED.
220 * Should be used by all ptrace implementations for PTRACE_TRACEME. 220 * Should be used by all ptrace implementations for PTRACE_TRACEME.
221 */ 221 */
222int ptrace_traceme(void) 222static int ptrace_traceme(void)
223{ 223{
224 int ret = -EPERM; 224 int ret = -EPERM;
225 225
@@ -293,7 +293,7 @@ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p)
293 return false; 293 return false;
294} 294}
295 295
296int ptrace_detach(struct task_struct *child, unsigned int data) 296static int ptrace_detach(struct task_struct *child, unsigned int data)
297{ 297{
298 bool dead = false; 298 bool dead = false;
299 299
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index ddabb54bb5c8..3c7cbc2c33be 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -215,7 +215,6 @@ void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
215 put_pid(waiter->deadlock_task_pid); 215 put_pid(waiter->deadlock_task_pid);
216 TRACE_WARN_ON(!plist_node_empty(&waiter->list_entry)); 216 TRACE_WARN_ON(!plist_node_empty(&waiter->list_entry));
217 TRACE_WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); 217 TRACE_WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
218 TRACE_WARN_ON(waiter->task);
219 memset(waiter, 0x22, sizeof(*waiter)); 218 memset(waiter, 0x22, sizeof(*waiter));
220} 219}
221 220
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index 66cb89bc5ef1..5c9ccd380966 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -9,7 +9,6 @@
9#include <linux/kthread.h> 9#include <linux/kthread.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/sched.h> 11#include <linux/sched.h>
12#include <linux/smp_lock.h>
13#include <linux/spinlock.h> 12#include <linux/spinlock.h>
14#include <linux/sysdev.h> 13#include <linux/sysdev.h>
15#include <linux/timer.h> 14#include <linux/timer.h>
@@ -27,7 +26,6 @@ struct test_thread_data {
27 int opcode; 26 int opcode;
28 int opdata; 27 int opdata;
29 int mutexes[MAX_RT_TEST_MUTEXES]; 28 int mutexes[MAX_RT_TEST_MUTEXES];
30 int bkl;
31 int event; 29 int event;
32 struct sys_device sysdev; 30 struct sys_device sysdev;
33}; 31};
@@ -46,9 +44,8 @@ enum test_opcodes {
46 RTTEST_LOCKINTNOWAIT, /* 6 Lock interruptible no wait in wakeup, data = lockindex */ 44 RTTEST_LOCKINTNOWAIT, /* 6 Lock interruptible no wait in wakeup, data = lockindex */
47 RTTEST_LOCKCONT, /* 7 Continue locking after the wakeup delay */ 45 RTTEST_LOCKCONT, /* 7 Continue locking after the wakeup delay */
48 RTTEST_UNLOCK, /* 8 Unlock, data = lockindex */ 46 RTTEST_UNLOCK, /* 8 Unlock, data = lockindex */
49 RTTEST_LOCKBKL, /* 9 Lock BKL */ 47 /* 9, 10 - reserved for BKL commemoration */
50 RTTEST_UNLOCKBKL, /* 10 Unlock BKL */ 48 RTTEST_SIGNAL = 11, /* 11 Signal other test thread, data = thread id */
51 RTTEST_SIGNAL, /* 11 Signal other test thread, data = thread id */
52 RTTEST_RESETEVENT = 98, /* 98 Reset event counter */ 49 RTTEST_RESETEVENT = 98, /* 98 Reset event counter */
53 RTTEST_RESET = 99, /* 99 Reset all pending operations */ 50 RTTEST_RESET = 99, /* 99 Reset all pending operations */
54}; 51};
@@ -74,13 +71,6 @@ static int handle_op(struct test_thread_data *td, int lockwakeup)
74 td->mutexes[i] = 0; 71 td->mutexes[i] = 0;
75 } 72 }
76 } 73 }
77
78 if (!lockwakeup && td->bkl == 4) {
79#ifdef CONFIG_LOCK_KERNEL
80 unlock_kernel();
81#endif
82 td->bkl = 0;
83 }
84 return 0; 74 return 0;
85 75
86 case RTTEST_RESETEVENT: 76 case RTTEST_RESETEVENT:
@@ -131,25 +121,6 @@ static int handle_op(struct test_thread_data *td, int lockwakeup)
131 td->mutexes[id] = 0; 121 td->mutexes[id] = 0;
132 return 0; 122 return 0;
133 123
134 case RTTEST_LOCKBKL:
135 if (td->bkl)
136 return 0;
137 td->bkl = 1;
138#ifdef CONFIG_LOCK_KERNEL
139 lock_kernel();
140#endif
141 td->bkl = 4;
142 return 0;
143
144 case RTTEST_UNLOCKBKL:
145 if (td->bkl != 4)
146 break;
147#ifdef CONFIG_LOCK_KERNEL
148 unlock_kernel();
149#endif
150 td->bkl = 0;
151 return 0;
152
153 default: 124 default:
154 break; 125 break;
155 } 126 }
@@ -196,7 +167,6 @@ void schedule_rt_mutex_test(struct rt_mutex *mutex)
196 td->event = atomic_add_return(1, &rttest_event); 167 td->event = atomic_add_return(1, &rttest_event);
197 break; 168 break;
198 169
199 case RTTEST_LOCKBKL:
200 default: 170 default:
201 break; 171 break;
202 } 172 }
@@ -229,8 +199,6 @@ void schedule_rt_mutex_test(struct rt_mutex *mutex)
229 td->event = atomic_add_return(1, &rttest_event); 199 td->event = atomic_add_return(1, &rttest_event);
230 return; 200 return;
231 201
232 case RTTEST_LOCKBKL:
233 return;
234 default: 202 default:
235 return; 203 return;
236 } 204 }
@@ -380,11 +348,11 @@ static ssize_t sysfs_test_status(struct sys_device *dev, struct sysdev_attribute
380 spin_lock(&rttest_lock); 348 spin_lock(&rttest_lock);
381 349
382 curr += sprintf(curr, 350 curr += sprintf(curr,
383 "O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, K: %d, M:", 351 "O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, M:",
384 td->opcode, td->event, tsk->state, 352 td->opcode, td->event, tsk->state,
385 (MAX_RT_PRIO - 1) - tsk->prio, 353 (MAX_RT_PRIO - 1) - tsk->prio,
386 (MAX_RT_PRIO - 1) - tsk->normal_prio, 354 (MAX_RT_PRIO - 1) - tsk->normal_prio,
387 tsk->pi_blocked_on, td->bkl); 355 tsk->pi_blocked_on);
388 356
389 for (i = MAX_RT_TEST_MUTEXES - 1; i >=0 ; i--) 357 for (i = MAX_RT_TEST_MUTEXES - 1; i >=0 ; i--)
390 curr += sprintf(curr, "%d", td->mutexes[i]); 358 curr += sprintf(curr, "%d", td->mutexes[i]);
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index a9604815786a..ab449117aaf2 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -20,41 +20,34 @@
20/* 20/*
21 * lock->owner state tracking: 21 * lock->owner state tracking:
22 * 22 *
23 * lock->owner holds the task_struct pointer of the owner. Bit 0 and 1 23 * lock->owner holds the task_struct pointer of the owner. Bit 0
24 * are used to keep track of the "owner is pending" and "lock has 24 * is used to keep track of the "lock has waiters" state.
25 * waiters" state.
26 * 25 *
27 * owner bit1 bit0 26 * owner bit0
28 * NULL 0 0 lock is free (fast acquire possible) 27 * NULL 0 lock is free (fast acquire possible)
29 * NULL 0 1 invalid state 28 * NULL 1 lock is free and has waiters and the top waiter
30 * NULL 1 0 Transitional State* 29 * is going to take the lock*
31 * NULL 1 1 invalid state 30 * taskpointer 0 lock is held (fast release possible)
32 * taskpointer 0 0 lock is held (fast release possible) 31 * taskpointer 1 lock is held and has waiters**
33 * taskpointer 0 1 task is pending owner
34 * taskpointer 1 0 lock is held and has waiters
35 * taskpointer 1 1 task is pending owner and lock has more waiters
36 *
37 * Pending ownership is assigned to the top (highest priority)
38 * waiter of the lock, when the lock is released. The thread is woken
39 * up and can now take the lock. Until the lock is taken (bit 0
40 * cleared) a competing higher priority thread can steal the lock
41 * which puts the woken up thread back on the waiters list.
42 * 32 *
43 * The fast atomic compare exchange based acquire and release is only 33 * The fast atomic compare exchange based acquire and release is only
44 * possible when bit 0 and 1 of lock->owner are 0. 34 * possible when bit 0 of lock->owner is 0.
35 *
36 * (*) It also can be a transitional state when grabbing the lock
37 * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
38 * we need to set the bit0 before looking at the lock, and the owner may be
39 * NULL in this small time, hence this can be a transitional state.
45 * 40 *
46 * (*) There's a small time where the owner can be NULL and the 41 * (**) There is a small time when bit 0 is set but there are no
47 * "lock has waiters" bit is set. This can happen when grabbing the lock. 42 * waiters. This can happen when grabbing the lock in the slow path.
48 * To prevent a cmpxchg of the owner releasing the lock, we need to set this 43 * To prevent a cmpxchg of the owner releasing the lock, we need to
49 * bit before looking at the lock, hence the reason this is a transitional 44 * set this bit before looking at the lock.
50 * state.
51 */ 45 */
52 46
53static void 47static void
54rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner, 48rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner)
55 unsigned long mask)
56{ 49{
57 unsigned long val = (unsigned long)owner | mask; 50 unsigned long val = (unsigned long)owner;
58 51
59 if (rt_mutex_has_waiters(lock)) 52 if (rt_mutex_has_waiters(lock))
60 val |= RT_MUTEX_HAS_WAITERS; 53 val |= RT_MUTEX_HAS_WAITERS;
@@ -203,15 +196,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
203 * reached or the state of the chain has changed while we 196 * reached or the state of the chain has changed while we
204 * dropped the locks. 197 * dropped the locks.
205 */ 198 */
206 if (!waiter || !waiter->task) 199 if (!waiter)
207 goto out_unlock_pi; 200 goto out_unlock_pi;
208 201
209 /* 202 /*
210 * Check the orig_waiter state. After we dropped the locks, 203 * Check the orig_waiter state. After we dropped the locks,
211 * the previous owner of the lock might have released the lock 204 * the previous owner of the lock might have released the lock.
212 * and made us the pending owner:
213 */ 205 */
214 if (orig_waiter && !orig_waiter->task) 206 if (orig_waiter && !rt_mutex_owner(orig_lock))
215 goto out_unlock_pi; 207 goto out_unlock_pi;
216 208
217 /* 209 /*
@@ -254,6 +246,17 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
254 246
255 /* Release the task */ 247 /* Release the task */
256 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 248 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
249 if (!rt_mutex_owner(lock)) {
250 /*
251 * If the requeue above changed the top waiter, then we need
252 * to wake the new top waiter up to try to get the lock.
253 */
254
255 if (top_waiter != rt_mutex_top_waiter(lock))
256 wake_up_process(rt_mutex_top_waiter(lock)->task);
257 raw_spin_unlock(&lock->wait_lock);
258 goto out_put_task;
259 }
257 put_task_struct(task); 260 put_task_struct(task);
258 261
259 /* Grab the next task */ 262 /* Grab the next task */
@@ -296,78 +299,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
296} 299}
297 300
298/* 301/*
299 * Optimization: check if we can steal the lock from the
300 * assigned pending owner [which might not have taken the
301 * lock yet]:
302 */
303static inline int try_to_steal_lock(struct rt_mutex *lock,
304 struct task_struct *task)
305{
306 struct task_struct *pendowner = rt_mutex_owner(lock);
307 struct rt_mutex_waiter *next;
308 unsigned long flags;
309
310 if (!rt_mutex_owner_pending(lock))
311 return 0;
312
313 if (pendowner == task)
314 return 1;
315
316 raw_spin_lock_irqsave(&pendowner->pi_lock, flags);
317 if (task->prio >= pendowner->prio) {
318 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
319 return 0;
320 }
321
322 /*
323 * Check if a waiter is enqueued on the pending owners
324 * pi_waiters list. Remove it and readjust pending owners
325 * priority.
326 */
327 if (likely(!rt_mutex_has_waiters(lock))) {
328 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
329 return 1;
330 }
331
332 /* No chain handling, pending owner is not blocked on anything: */
333 next = rt_mutex_top_waiter(lock);
334 plist_del(&next->pi_list_entry, &pendowner->pi_waiters);
335 __rt_mutex_adjust_prio(pendowner);
336 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
337
338 /*
339 * We are going to steal the lock and a waiter was
340 * enqueued on the pending owners pi_waiters queue. So
341 * we have to enqueue this waiter into
342 * task->pi_waiters list. This covers the case,
343 * where task is boosted because it holds another
344 * lock and gets unboosted because the booster is
345 * interrupted, so we would delay a waiter with higher
346 * priority as task->normal_prio.
347 *
348 * Note: in the rare case of a SCHED_OTHER task changing
349 * its priority and thus stealing the lock, next->task
350 * might be task:
351 */
352 if (likely(next->task != task)) {
353 raw_spin_lock_irqsave(&task->pi_lock, flags);
354 plist_add(&next->pi_list_entry, &task->pi_waiters);
355 __rt_mutex_adjust_prio(task);
356 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
357 }
358 return 1;
359}
360
361/*
362 * Try to take an rt-mutex 302 * Try to take an rt-mutex
363 * 303 *
364 * This fails
365 * - when the lock has a real owner
366 * - when a different pending owner exists and has higher priority than current
367 *
368 * Must be called with lock->wait_lock held. 304 * Must be called with lock->wait_lock held.
305 *
306 * @lock: the lock to be acquired.
307 * @task: the task which wants to acquire the lock
308 * @waiter: the waiter that is queued to the lock's wait list. (could be NULL)
369 */ 309 */
370static int try_to_take_rt_mutex(struct rt_mutex *lock) 310static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
311 struct rt_mutex_waiter *waiter)
371{ 312{
372 /* 313 /*
373 * We have to be careful here if the atomic speedups are 314 * We have to be careful here if the atomic speedups are
@@ -390,15 +331,52 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock)
390 */ 331 */
391 mark_rt_mutex_waiters(lock); 332 mark_rt_mutex_waiters(lock);
392 333
393 if (rt_mutex_owner(lock) && !try_to_steal_lock(lock, current)) 334 if (rt_mutex_owner(lock))
394 return 0; 335 return 0;
395 336
337 /*
338 * It will get the lock because of one of these conditions:
339 * 1) there is no waiter
340 * 2) higher priority than waiters
341 * 3) it is top waiter
342 */
343 if (rt_mutex_has_waiters(lock)) {
344 if (task->prio >= rt_mutex_top_waiter(lock)->list_entry.prio) {
345 if (!waiter || waiter != rt_mutex_top_waiter(lock))
346 return 0;
347 }
348 }
349
350 if (waiter || rt_mutex_has_waiters(lock)) {
351 unsigned long flags;
352 struct rt_mutex_waiter *top;
353
354 raw_spin_lock_irqsave(&task->pi_lock, flags);
355
356 /* remove the queued waiter. */
357 if (waiter) {
358 plist_del(&waiter->list_entry, &lock->wait_list);
359 task->pi_blocked_on = NULL;
360 }
361
362 /*
363 * We have to enqueue the top waiter(if it exists) into
364 * task->pi_waiters list.
365 */
366 if (rt_mutex_has_waiters(lock)) {
367 top = rt_mutex_top_waiter(lock);
368 top->pi_list_entry.prio = top->list_entry.prio;
369 plist_add(&top->pi_list_entry, &task->pi_waiters);
370 }
371 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
372 }
373
396 /* We got the lock. */ 374 /* We got the lock. */
397 debug_rt_mutex_lock(lock); 375 debug_rt_mutex_lock(lock);
398 376
399 rt_mutex_set_owner(lock, current, 0); 377 rt_mutex_set_owner(lock, task);
400 378
401 rt_mutex_deadlock_account_lock(lock, current); 379 rt_mutex_deadlock_account_lock(lock, task);
402 380
403 return 1; 381 return 1;
404} 382}
@@ -436,6 +414,9 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
436 414
437 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 415 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
438 416
417 if (!owner)
418 return 0;
419
439 if (waiter == rt_mutex_top_waiter(lock)) { 420 if (waiter == rt_mutex_top_waiter(lock)) {
440 raw_spin_lock_irqsave(&owner->pi_lock, flags); 421 raw_spin_lock_irqsave(&owner->pi_lock, flags);
441 plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters); 422 plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
@@ -472,21 +453,18 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
472/* 453/*
473 * Wake up the next waiter on the lock. 454 * Wake up the next waiter on the lock.
474 * 455 *
475 * Remove the top waiter from the current tasks waiter list and from 456 * Remove the top waiter from the current tasks waiter list and wake it up.
476 * the lock waiter list. Set it as pending owner. Then wake it up.
477 * 457 *
478 * Called with lock->wait_lock held. 458 * Called with lock->wait_lock held.
479 */ 459 */
480static void wakeup_next_waiter(struct rt_mutex *lock) 460static void wakeup_next_waiter(struct rt_mutex *lock)
481{ 461{
482 struct rt_mutex_waiter *waiter; 462 struct rt_mutex_waiter *waiter;
483 struct task_struct *pendowner;
484 unsigned long flags; 463 unsigned long flags;
485 464
486 raw_spin_lock_irqsave(&current->pi_lock, flags); 465 raw_spin_lock_irqsave(&current->pi_lock, flags);
487 466
488 waiter = rt_mutex_top_waiter(lock); 467 waiter = rt_mutex_top_waiter(lock);
489 plist_del(&waiter->list_entry, &lock->wait_list);
490 468
491 /* 469 /*
492 * Remove it from current->pi_waiters. We do not adjust a 470 * Remove it from current->pi_waiters. We do not adjust a
@@ -495,43 +473,19 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
495 * lock->wait_lock. 473 * lock->wait_lock.
496 */ 474 */
497 plist_del(&waiter->pi_list_entry, &current->pi_waiters); 475 plist_del(&waiter->pi_list_entry, &current->pi_waiters);
498 pendowner = waiter->task;
499 waiter->task = NULL;
500 476
501 rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING); 477 rt_mutex_set_owner(lock, NULL);
502 478
503 raw_spin_unlock_irqrestore(&current->pi_lock, flags); 479 raw_spin_unlock_irqrestore(&current->pi_lock, flags);
504 480
505 /* 481 wake_up_process(waiter->task);
506 * Clear the pi_blocked_on variable and enqueue a possible
507 * waiter into the pi_waiters list of the pending owner. This
508 * prevents that in case the pending owner gets unboosted a
509 * waiter with higher priority than pending-owner->normal_prio
510 * is blocked on the unboosted (pending) owner.
511 */
512 raw_spin_lock_irqsave(&pendowner->pi_lock, flags);
513
514 WARN_ON(!pendowner->pi_blocked_on);
515 WARN_ON(pendowner->pi_blocked_on != waiter);
516 WARN_ON(pendowner->pi_blocked_on->lock != lock);
517
518 pendowner->pi_blocked_on = NULL;
519
520 if (rt_mutex_has_waiters(lock)) {
521 struct rt_mutex_waiter *next;
522
523 next = rt_mutex_top_waiter(lock);
524 plist_add(&next->pi_list_entry, &pendowner->pi_waiters);
525 }
526 raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
527
528 wake_up_process(pendowner);
529} 482}
530 483
531/* 484/*
532 * Remove a waiter from a lock 485 * Remove a waiter from a lock and give up
533 * 486 *
534 * Must be called with lock->wait_lock held 487 * Must be called with lock->wait_lock held and
488 * have just failed to try_to_take_rt_mutex().
535 */ 489 */
536static void remove_waiter(struct rt_mutex *lock, 490static void remove_waiter(struct rt_mutex *lock,
537 struct rt_mutex_waiter *waiter) 491 struct rt_mutex_waiter *waiter)
@@ -543,11 +497,13 @@ static void remove_waiter(struct rt_mutex *lock,
543 497
544 raw_spin_lock_irqsave(&current->pi_lock, flags); 498 raw_spin_lock_irqsave(&current->pi_lock, flags);
545 plist_del(&waiter->list_entry, &lock->wait_list); 499 plist_del(&waiter->list_entry, &lock->wait_list);
546 waiter->task = NULL;
547 current->pi_blocked_on = NULL; 500 current->pi_blocked_on = NULL;
548 raw_spin_unlock_irqrestore(&current->pi_lock, flags); 501 raw_spin_unlock_irqrestore(&current->pi_lock, flags);
549 502
550 if (first && owner != current) { 503 if (!owner)
504 return;
505
506 if (first) {
551 507
552 raw_spin_lock_irqsave(&owner->pi_lock, flags); 508 raw_spin_lock_irqsave(&owner->pi_lock, flags);
553 509
@@ -614,21 +570,19 @@ void rt_mutex_adjust_pi(struct task_struct *task)
614 * or TASK_UNINTERRUPTIBLE) 570 * or TASK_UNINTERRUPTIBLE)
615 * @timeout: the pre-initialized and started timer, or NULL for none 571 * @timeout: the pre-initialized and started timer, or NULL for none
616 * @waiter: the pre-initialized rt_mutex_waiter 572 * @waiter: the pre-initialized rt_mutex_waiter
617 * @detect_deadlock: passed to task_blocks_on_rt_mutex
618 * 573 *
619 * lock->wait_lock must be held by the caller. 574 * lock->wait_lock must be held by the caller.
620 */ 575 */
621static int __sched 576static int __sched
622__rt_mutex_slowlock(struct rt_mutex *lock, int state, 577__rt_mutex_slowlock(struct rt_mutex *lock, int state,
623 struct hrtimer_sleeper *timeout, 578 struct hrtimer_sleeper *timeout,
624 struct rt_mutex_waiter *waiter, 579 struct rt_mutex_waiter *waiter)
625 int detect_deadlock)
626{ 580{
627 int ret = 0; 581 int ret = 0;
628 582
629 for (;;) { 583 for (;;) {
630 /* Try to acquire the lock: */ 584 /* Try to acquire the lock: */
631 if (try_to_take_rt_mutex(lock)) 585 if (try_to_take_rt_mutex(lock, current, waiter))
632 break; 586 break;
633 587
634 /* 588 /*
@@ -645,39 +599,11 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
645 break; 599 break;
646 } 600 }
647 601
648 /*
649 * waiter->task is NULL the first time we come here and
650 * when we have been woken up by the previous owner
651 * but the lock got stolen by a higher prio task.
652 */
653 if (!waiter->task) {
654 ret = task_blocks_on_rt_mutex(lock, waiter, current,
655 detect_deadlock);
656 /*
657 * If we got woken up by the owner then start loop
658 * all over without going into schedule to try
659 * to get the lock now:
660 */
661 if (unlikely(!waiter->task)) {
662 /*
663 * Reset the return value. We might
664 * have returned with -EDEADLK and the
665 * owner released the lock while we
666 * were walking the pi chain.
667 */
668 ret = 0;
669 continue;
670 }
671 if (unlikely(ret))
672 break;
673 }
674
675 raw_spin_unlock(&lock->wait_lock); 602 raw_spin_unlock(&lock->wait_lock);
676 603
677 debug_rt_mutex_print_deadlock(waiter); 604 debug_rt_mutex_print_deadlock(waiter);
678 605
679 if (waiter->task) 606 schedule_rt_mutex(lock);
680 schedule_rt_mutex(lock);
681 607
682 raw_spin_lock(&lock->wait_lock); 608 raw_spin_lock(&lock->wait_lock);
683 set_current_state(state); 609 set_current_state(state);
@@ -698,12 +624,11 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
698 int ret = 0; 624 int ret = 0;
699 625
700 debug_rt_mutex_init_waiter(&waiter); 626 debug_rt_mutex_init_waiter(&waiter);
701 waiter.task = NULL;
702 627
703 raw_spin_lock(&lock->wait_lock); 628 raw_spin_lock(&lock->wait_lock);
704 629
705 /* Try to acquire the lock again: */ 630 /* Try to acquire the lock again: */
706 if (try_to_take_rt_mutex(lock)) { 631 if (try_to_take_rt_mutex(lock, current, NULL)) {
707 raw_spin_unlock(&lock->wait_lock); 632 raw_spin_unlock(&lock->wait_lock);
708 return 0; 633 return 0;
709 } 634 }
@@ -717,12 +642,14 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
717 timeout->task = NULL; 642 timeout->task = NULL;
718 } 643 }
719 644
720 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter, 645 ret = task_blocks_on_rt_mutex(lock, &waiter, current, detect_deadlock);
721 detect_deadlock); 646
647 if (likely(!ret))
648 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
722 649
723 set_current_state(TASK_RUNNING); 650 set_current_state(TASK_RUNNING);
724 651
725 if (unlikely(waiter.task)) 652 if (unlikely(ret))
726 remove_waiter(lock, &waiter); 653 remove_waiter(lock, &waiter);
727 654
728 /* 655 /*
@@ -737,14 +664,6 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
737 if (unlikely(timeout)) 664 if (unlikely(timeout))
738 hrtimer_cancel(&timeout->timer); 665 hrtimer_cancel(&timeout->timer);
739 666
740 /*
741 * Readjust priority, when we did not get the lock. We might
742 * have been the pending owner and boosted. Since we did not
743 * take the lock, the PI boost has to go.
744 */
745 if (unlikely(ret))
746 rt_mutex_adjust_prio(current);
747
748 debug_rt_mutex_free_waiter(&waiter); 667 debug_rt_mutex_free_waiter(&waiter);
749 668
750 return ret; 669 return ret;
@@ -762,7 +681,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock)
762 681
763 if (likely(rt_mutex_owner(lock) != current)) { 682 if (likely(rt_mutex_owner(lock) != current)) {
764 683
765 ret = try_to_take_rt_mutex(lock); 684 ret = try_to_take_rt_mutex(lock, current, NULL);
766 /* 685 /*
767 * try_to_take_rt_mutex() sets the lock waiters 686 * try_to_take_rt_mutex() sets the lock waiters
768 * bit unconditionally. Clean this up. 687 * bit unconditionally. Clean this up.
@@ -992,7 +911,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
992{ 911{
993 __rt_mutex_init(lock, NULL); 912 __rt_mutex_init(lock, NULL);
994 debug_rt_mutex_proxy_lock(lock, proxy_owner); 913 debug_rt_mutex_proxy_lock(lock, proxy_owner);
995 rt_mutex_set_owner(lock, proxy_owner, 0); 914 rt_mutex_set_owner(lock, proxy_owner);
996 rt_mutex_deadlock_account_lock(lock, proxy_owner); 915 rt_mutex_deadlock_account_lock(lock, proxy_owner);
997} 916}
998 917
@@ -1008,7 +927,7 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,
1008 struct task_struct *proxy_owner) 927 struct task_struct *proxy_owner)
1009{ 928{
1010 debug_rt_mutex_proxy_unlock(lock); 929 debug_rt_mutex_proxy_unlock(lock);
1011 rt_mutex_set_owner(lock, NULL, 0); 930 rt_mutex_set_owner(lock, NULL);
1012 rt_mutex_deadlock_account_unlock(proxy_owner); 931 rt_mutex_deadlock_account_unlock(proxy_owner);
1013} 932}
1014 933
@@ -1034,20 +953,14 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1034 953
1035 raw_spin_lock(&lock->wait_lock); 954 raw_spin_lock(&lock->wait_lock);
1036 955
1037 mark_rt_mutex_waiters(lock); 956 if (try_to_take_rt_mutex(lock, task, NULL)) {
1038
1039 if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) {
1040 /* We got the lock for task. */
1041 debug_rt_mutex_lock(lock);
1042 rt_mutex_set_owner(lock, task, 0);
1043 raw_spin_unlock(&lock->wait_lock); 957 raw_spin_unlock(&lock->wait_lock);
1044 rt_mutex_deadlock_account_lock(lock, task);
1045 return 1; 958 return 1;
1046 } 959 }
1047 960
1048 ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock); 961 ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
1049 962
1050 if (ret && !waiter->task) { 963 if (ret && !rt_mutex_owner(lock)) {
1051 /* 964 /*
1052 * Reset the return value. We might have 965 * Reset the return value. We might have
1053 * returned with -EDEADLK and the owner 966 * returned with -EDEADLK and the owner
@@ -1056,6 +969,10 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1056 */ 969 */
1057 ret = 0; 970 ret = 0;
1058 } 971 }
972
973 if (unlikely(ret))
974 remove_waiter(lock, waiter);
975
1059 raw_spin_unlock(&lock->wait_lock); 976 raw_spin_unlock(&lock->wait_lock);
1060 977
1061 debug_rt_mutex_print_deadlock(waiter); 978 debug_rt_mutex_print_deadlock(waiter);
@@ -1110,12 +1027,11 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
1110 1027
1111 set_current_state(TASK_INTERRUPTIBLE); 1028 set_current_state(TASK_INTERRUPTIBLE);
1112 1029
1113 ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, 1030 ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
1114 detect_deadlock);
1115 1031
1116 set_current_state(TASK_RUNNING); 1032 set_current_state(TASK_RUNNING);
1117 1033
1118 if (unlikely(waiter->task)) 1034 if (unlikely(ret))
1119 remove_waiter(lock, waiter); 1035 remove_waiter(lock, waiter);
1120 1036
1121 /* 1037 /*
@@ -1126,13 +1042,5 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
1126 1042
1127 raw_spin_unlock(&lock->wait_lock); 1043 raw_spin_unlock(&lock->wait_lock);
1128 1044
1129 /*
1130 * Readjust priority, when we did not get the lock. We might have been
1131 * the pending owner and boosted. Since we did not take the lock, the
1132 * PI boost has to go.
1133 */
1134 if (unlikely(ret))
1135 rt_mutex_adjust_prio(current);
1136
1137 return ret; 1045 return ret;
1138} 1046}
diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h
index 97a2f81866af..53a66c85261b 100644
--- a/kernel/rtmutex_common.h
+++ b/kernel/rtmutex_common.h
@@ -91,9 +91,8 @@ task_top_pi_waiter(struct task_struct *p)
91/* 91/*
92 * lock->owner state tracking: 92 * lock->owner state tracking:
93 */ 93 */
94#define RT_MUTEX_OWNER_PENDING 1UL 94#define RT_MUTEX_HAS_WAITERS 1UL
95#define RT_MUTEX_HAS_WAITERS 2UL 95#define RT_MUTEX_OWNER_MASKALL 1UL
96#define RT_MUTEX_OWNER_MASKALL 3UL
97 96
98static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) 97static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
99{ 98{
@@ -101,17 +100,6 @@ static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
101 ((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL); 100 ((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL);
102} 101}
103 102
104static inline struct task_struct *rt_mutex_real_owner(struct rt_mutex *lock)
105{
106 return (struct task_struct *)
107 ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
108}
109
110static inline unsigned long rt_mutex_owner_pending(struct rt_mutex *lock)
111{
112 return (unsigned long)lock->owner & RT_MUTEX_OWNER_PENDING;
113}
114
115/* 103/*
116 * PI-futex support (proxy locking functions, etc.): 104 * PI-futex support (proxy locking functions, etc.):
117 */ 105 */
diff --git a/kernel/sched.c b/kernel/sched.c
index 18d38e4ec7ba..c8e40b7005c0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -324,7 +324,7 @@ struct cfs_rq {
324 * 'curr' points to currently running entity on this cfs_rq. 324 * 'curr' points to currently running entity on this cfs_rq.
325 * It is set to NULL otherwise (i.e when none are currently running). 325 * It is set to NULL otherwise (i.e when none are currently running).
326 */ 326 */
327 struct sched_entity *curr, *next, *last; 327 struct sched_entity *curr, *next, *last, *skip;
328 328
329 unsigned int nr_spread_over; 329 unsigned int nr_spread_over;
330 330
@@ -606,9 +606,6 @@ static inline struct task_group *task_group(struct task_struct *p)
606 struct task_group *tg; 606 struct task_group *tg;
607 struct cgroup_subsys_state *css; 607 struct cgroup_subsys_state *css;
608 608
609 if (p->flags & PF_EXITING)
610 return &root_task_group;
611
612 css = task_subsys_state_check(p, cpu_cgroup_subsys_id, 609 css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
613 lockdep_is_held(&task_rq(p)->lock)); 610 lockdep_is_held(&task_rq(p)->lock));
614 tg = container_of(css, struct task_group, css); 611 tg = container_of(css, struct task_group, css);
@@ -1686,6 +1683,39 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
1686 __release(rq2->lock); 1683 __release(rq2->lock);
1687} 1684}
1688 1685
1686#else /* CONFIG_SMP */
1687
1688/*
1689 * double_rq_lock - safely lock two runqueues
1690 *
1691 * Note this does not disable interrupts like task_rq_lock,
1692 * you need to do so manually before calling.
1693 */
1694static void double_rq_lock(struct rq *rq1, struct rq *rq2)
1695 __acquires(rq1->lock)
1696 __acquires(rq2->lock)
1697{
1698 BUG_ON(!irqs_disabled());
1699 BUG_ON(rq1 != rq2);
1700 raw_spin_lock(&rq1->lock);
1701 __acquire(rq2->lock); /* Fake it out ;) */
1702}
1703
1704/*
1705 * double_rq_unlock - safely unlock two runqueues
1706 *
1707 * Note this does not restore interrupts like task_rq_unlock,
1708 * you need to do so manually after calling.
1709 */
1710static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
1711 __releases(rq1->lock)
1712 __releases(rq2->lock)
1713{
1714 BUG_ON(rq1 != rq2);
1715 raw_spin_unlock(&rq1->lock);
1716 __release(rq2->lock);
1717}
1718
1689#endif 1719#endif
1690 1720
1691static void calc_load_account_idle(struct rq *this_rq); 1721static void calc_load_account_idle(struct rq *this_rq);
@@ -1880,7 +1910,7 @@ void account_system_vtime(struct task_struct *curr)
1880 */ 1910 */
1881 if (hardirq_count()) 1911 if (hardirq_count())
1882 __this_cpu_add(cpu_hardirq_time, delta); 1912 __this_cpu_add(cpu_hardirq_time, delta);
1883 else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) 1913 else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
1884 __this_cpu_add(cpu_softirq_time, delta); 1914 __this_cpu_add(cpu_softirq_time, delta);
1885 1915
1886 irq_time_write_end(); 1916 irq_time_write_end();
@@ -1920,8 +1950,40 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
1920 sched_rt_avg_update(rq, irq_delta); 1950 sched_rt_avg_update(rq, irq_delta);
1921} 1951}
1922 1952
1953static int irqtime_account_hi_update(void)
1954{
1955 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
1956 unsigned long flags;
1957 u64 latest_ns;
1958 int ret = 0;
1959
1960 local_irq_save(flags);
1961 latest_ns = this_cpu_read(cpu_hardirq_time);
1962 if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->irq))
1963 ret = 1;
1964 local_irq_restore(flags);
1965 return ret;
1966}
1967
1968static int irqtime_account_si_update(void)
1969{
1970 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
1971 unsigned long flags;
1972 u64 latest_ns;
1973 int ret = 0;
1974
1975 local_irq_save(flags);
1976 latest_ns = this_cpu_read(cpu_softirq_time);
1977 if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->softirq))
1978 ret = 1;
1979 local_irq_restore(flags);
1980 return ret;
1981}
1982
1923#else /* CONFIG_IRQ_TIME_ACCOUNTING */ 1983#else /* CONFIG_IRQ_TIME_ACCOUNTING */
1924 1984
1985#define sched_clock_irqtime (0)
1986
1925static void update_rq_clock_task(struct rq *rq, s64 delta) 1987static void update_rq_clock_task(struct rq *rq, s64 delta)
1926{ 1988{
1927 rq->clock_task += delta; 1989 rq->clock_task += delta;
@@ -2025,14 +2087,14 @@ inline int task_curr(const struct task_struct *p)
2025 2087
2026static inline void check_class_changed(struct rq *rq, struct task_struct *p, 2088static inline void check_class_changed(struct rq *rq, struct task_struct *p,
2027 const struct sched_class *prev_class, 2089 const struct sched_class *prev_class,
2028 int oldprio, int running) 2090 int oldprio)
2029{ 2091{
2030 if (prev_class != p->sched_class) { 2092 if (prev_class != p->sched_class) {
2031 if (prev_class->switched_from) 2093 if (prev_class->switched_from)
2032 prev_class->switched_from(rq, p, running); 2094 prev_class->switched_from(rq, p);
2033 p->sched_class->switched_to(rq, p, running); 2095 p->sched_class->switched_to(rq, p);
2034 } else 2096 } else if (oldprio != p->prio)
2035 p->sched_class->prio_changed(rq, p, oldprio, running); 2097 p->sched_class->prio_changed(rq, p, oldprio);
2036} 2098}
2037 2099
2038static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) 2100static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
@@ -2224,7 +2286,10 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
2224 * yield - it could be a while. 2286 * yield - it could be a while.
2225 */ 2287 */
2226 if (unlikely(on_rq)) { 2288 if (unlikely(on_rq)) {
2227 schedule_timeout_uninterruptible(1); 2289 ktime_t to = ktime_set(0, NSEC_PER_SEC/HZ);
2290
2291 set_current_state(TASK_UNINTERRUPTIBLE);
2292 schedule_hrtimeout(&to, HRTIMER_MODE_REL);
2228 continue; 2293 continue;
2229 } 2294 }
2230 2295
@@ -2265,27 +2330,6 @@ void kick_process(struct task_struct *p)
2265EXPORT_SYMBOL_GPL(kick_process); 2330EXPORT_SYMBOL_GPL(kick_process);
2266#endif /* CONFIG_SMP */ 2331#endif /* CONFIG_SMP */
2267 2332
2268/**
2269 * task_oncpu_function_call - call a function on the cpu on which a task runs
2270 * @p: the task to evaluate
2271 * @func: the function to be called
2272 * @info: the function call argument
2273 *
2274 * Calls the function @func when the task is currently running. This might
2275 * be on the current CPU, which just calls the function directly
2276 */
2277void task_oncpu_function_call(struct task_struct *p,
2278 void (*func) (void *info), void *info)
2279{
2280 int cpu;
2281
2282 preempt_disable();
2283 cpu = task_cpu(p);
2284 if (task_curr(p))
2285 smp_call_function_single(cpu, func, info, 1);
2286 preempt_enable();
2287}
2288
2289#ifdef CONFIG_SMP 2333#ifdef CONFIG_SMP
2290/* 2334/*
2291 * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held. 2335 * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held.
@@ -2566,6 +2610,7 @@ static void __sched_fork(struct task_struct *p)
2566 p->se.sum_exec_runtime = 0; 2610 p->se.sum_exec_runtime = 0;
2567 p->se.prev_sum_exec_runtime = 0; 2611 p->se.prev_sum_exec_runtime = 0;
2568 p->se.nr_migrations = 0; 2612 p->se.nr_migrations = 0;
2613 p->se.vruntime = 0;
2569 2614
2570#ifdef CONFIG_SCHEDSTATS 2615#ifdef CONFIG_SCHEDSTATS
2571 memset(&p->se.statistics, 0, sizeof(p->se.statistics)); 2616 memset(&p->se.statistics, 0, sizeof(p->se.statistics));
@@ -2776,9 +2821,12 @@ static inline void
2776prepare_task_switch(struct rq *rq, struct task_struct *prev, 2821prepare_task_switch(struct rq *rq, struct task_struct *prev,
2777 struct task_struct *next) 2822 struct task_struct *next)
2778{ 2823{
2824 sched_info_switch(prev, next);
2825 perf_event_task_sched_out(prev, next);
2779 fire_sched_out_preempt_notifiers(prev, next); 2826 fire_sched_out_preempt_notifiers(prev, next);
2780 prepare_lock_switch(rq, next); 2827 prepare_lock_switch(rq, next);
2781 prepare_arch_switch(next); 2828 prepare_arch_switch(next);
2829 trace_sched_switch(prev, next);
2782} 2830}
2783 2831
2784/** 2832/**
@@ -2911,7 +2959,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
2911 struct mm_struct *mm, *oldmm; 2959 struct mm_struct *mm, *oldmm;
2912 2960
2913 prepare_task_switch(rq, prev, next); 2961 prepare_task_switch(rq, prev, next);
2914 trace_sched_switch(prev, next); 2962
2915 mm = next->mm; 2963 mm = next->mm;
2916 oldmm = prev->active_mm; 2964 oldmm = prev->active_mm;
2917 /* 2965 /*
@@ -3568,6 +3616,32 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime,
3568} 3616}
3569 3617
3570/* 3618/*
3619 * Account system cpu time to a process and desired cpustat field
3620 * @p: the process that the cpu time gets accounted to
3621 * @cputime: the cpu time spent in kernel space since the last update
3622 * @cputime_scaled: cputime scaled by cpu frequency
3623 * @target_cputime64: pointer to cpustat field that has to be updated
3624 */
3625static inline
3626void __account_system_time(struct task_struct *p, cputime_t cputime,
3627 cputime_t cputime_scaled, cputime64_t *target_cputime64)
3628{
3629 cputime64_t tmp = cputime_to_cputime64(cputime);
3630
3631 /* Add system time to process. */
3632 p->stime = cputime_add(p->stime, cputime);
3633 p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
3634 account_group_system_time(p, cputime);
3635
3636 /* Add system time to cpustat. */
3637 *target_cputime64 = cputime64_add(*target_cputime64, tmp);
3638 cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
3639
3640 /* Account for system time used */
3641 acct_update_integrals(p);
3642}
3643
3644/*
3571 * Account system cpu time to a process. 3645 * Account system cpu time to a process.
3572 * @p: the process that the cpu time gets accounted to 3646 * @p: the process that the cpu time gets accounted to
3573 * @hardirq_offset: the offset to subtract from hardirq_count() 3647 * @hardirq_offset: the offset to subtract from hardirq_count()
@@ -3578,36 +3652,26 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
3578 cputime_t cputime, cputime_t cputime_scaled) 3652 cputime_t cputime, cputime_t cputime_scaled)
3579{ 3653{
3580 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; 3654 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
3581 cputime64_t tmp; 3655 cputime64_t *target_cputime64;
3582 3656
3583 if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { 3657 if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
3584 account_guest_time(p, cputime, cputime_scaled); 3658 account_guest_time(p, cputime, cputime_scaled);
3585 return; 3659 return;
3586 } 3660 }
3587 3661
3588 /* Add system time to process. */
3589 p->stime = cputime_add(p->stime, cputime);
3590 p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
3591 account_group_system_time(p, cputime);
3592
3593 /* Add system time to cpustat. */
3594 tmp = cputime_to_cputime64(cputime);
3595 if (hardirq_count() - hardirq_offset) 3662 if (hardirq_count() - hardirq_offset)
3596 cpustat->irq = cputime64_add(cpustat->irq, tmp); 3663 target_cputime64 = &cpustat->irq;
3597 else if (in_serving_softirq()) 3664 else if (in_serving_softirq())
3598 cpustat->softirq = cputime64_add(cpustat->softirq, tmp); 3665 target_cputime64 = &cpustat->softirq;
3599 else 3666 else
3600 cpustat->system = cputime64_add(cpustat->system, tmp); 3667 target_cputime64 = &cpustat->system;
3601 3668
3602 cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime); 3669 __account_system_time(p, cputime, cputime_scaled, target_cputime64);
3603
3604 /* Account for system time used */
3605 acct_update_integrals(p);
3606} 3670}
3607 3671
3608/* 3672/*
3609 * Account for involuntary wait time. 3673 * Account for involuntary wait time.
3610 * @steal: the cpu time spent in involuntary wait 3674 * @cputime: the cpu time spent in involuntary wait
3611 */ 3675 */
3612void account_steal_time(cputime_t cputime) 3676void account_steal_time(cputime_t cputime)
3613{ 3677{
@@ -3635,6 +3699,73 @@ void account_idle_time(cputime_t cputime)
3635 3699
3636#ifndef CONFIG_VIRT_CPU_ACCOUNTING 3700#ifndef CONFIG_VIRT_CPU_ACCOUNTING
3637 3701
3702#ifdef CONFIG_IRQ_TIME_ACCOUNTING
3703/*
3704 * Account a tick to a process and cpustat
3705 * @p: the process that the cpu time gets accounted to
3706 * @user_tick: is the tick from userspace
3707 * @rq: the pointer to rq
3708 *
3709 * Tick demultiplexing follows the order
3710 * - pending hardirq update
3711 * - pending softirq update
3712 * - user_time
3713 * - idle_time
3714 * - system time
3715 * - check for guest_time
3716 * - else account as system_time
3717 *
3718 * Check for hardirq is done both for system and user time as there is
3719 * no timer going off while we are on hardirq and hence we may never get an
3720 * opportunity to update it solely in system time.
3721 * p->stime and friends are only updated on system time and not on irq
3722 * softirq as those do not count in task exec_runtime any more.
3723 */
3724static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
3725 struct rq *rq)
3726{
3727 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
3728 cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy);
3729 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
3730
3731 if (irqtime_account_hi_update()) {
3732 cpustat->irq = cputime64_add(cpustat->irq, tmp);
3733 } else if (irqtime_account_si_update()) {
3734 cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
3735 } else if (this_cpu_ksoftirqd() == p) {
3736 /*
3737 * ksoftirqd time do not get accounted in cpu_softirq_time.
3738 * So, we have to handle it separately here.
3739 * Also, p->stime needs to be updated for ksoftirqd.
3740 */
3741 __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
3742 &cpustat->softirq);
3743 } else if (user_tick) {
3744 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
3745 } else if (p == rq->idle) {
3746 account_idle_time(cputime_one_jiffy);
3747 } else if (p->flags & PF_VCPU) { /* System time or guest time */
3748 account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled);
3749 } else {
3750 __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
3751 &cpustat->system);
3752 }
3753}
3754
3755static void irqtime_account_idle_ticks(int ticks)
3756{
3757 int i;
3758 struct rq *rq = this_rq();
3759
3760 for (i = 0; i < ticks; i++)
3761 irqtime_account_process_tick(current, 0, rq);
3762}
3763#else /* CONFIG_IRQ_TIME_ACCOUNTING */
3764static void irqtime_account_idle_ticks(int ticks) {}
3765static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
3766 struct rq *rq) {}
3767#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
3768
3638/* 3769/*
3639 * Account a single tick of cpu time. 3770 * Account a single tick of cpu time.
3640 * @p: the process that the cpu time gets accounted to 3771 * @p: the process that the cpu time gets accounted to
@@ -3645,6 +3776,11 @@ void account_process_tick(struct task_struct *p, int user_tick)
3645 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); 3776 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
3646 struct rq *rq = this_rq(); 3777 struct rq *rq = this_rq();
3647 3778
3779 if (sched_clock_irqtime) {
3780 irqtime_account_process_tick(p, user_tick, rq);
3781 return;
3782 }
3783
3648 if (user_tick) 3784 if (user_tick)
3649 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); 3785 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
3650 else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) 3786 else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
@@ -3670,6 +3806,12 @@ void account_steal_ticks(unsigned long ticks)
3670 */ 3806 */
3671void account_idle_ticks(unsigned long ticks) 3807void account_idle_ticks(unsigned long ticks)
3672{ 3808{
3809
3810 if (sched_clock_irqtime) {
3811 irqtime_account_idle_ticks(ticks);
3812 return;
3813 }
3814
3673 account_idle_time(jiffies_to_cputime(ticks)); 3815 account_idle_time(jiffies_to_cputime(ticks));
3674} 3816}
3675 3817
@@ -3989,9 +4131,6 @@ need_resched_nonpreemptible:
3989 rq->skip_clock_update = 0; 4131 rq->skip_clock_update = 0;
3990 4132
3991 if (likely(prev != next)) { 4133 if (likely(prev != next)) {
3992 sched_info_switch(prev, next);
3993 perf_event_task_sched_out(prev, next);
3994
3995 rq->nr_switches++; 4134 rq->nr_switches++;
3996 rq->curr = next; 4135 rq->curr = next;
3997 ++*switch_count; 4136 ++*switch_count;
@@ -4213,6 +4352,7 @@ void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
4213{ 4352{
4214 __wake_up_common(q, mode, 1, 0, key); 4353 __wake_up_common(q, mode, 1, 0, key);
4215} 4354}
4355EXPORT_SYMBOL_GPL(__wake_up_locked_key);
4216 4356
4217/** 4357/**
4218 * __wake_up_sync_key - wake up threads blocked on a waitqueue. 4358 * __wake_up_sync_key - wake up threads blocked on a waitqueue.
@@ -4570,11 +4710,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
4570 4710
4571 if (running) 4711 if (running)
4572 p->sched_class->set_curr_task(rq); 4712 p->sched_class->set_curr_task(rq);
4573 if (on_rq) { 4713 if (on_rq)
4574 enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); 4714 enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0);
4575 4715
4576 check_class_changed(rq, p, prev_class, oldprio, running); 4716 check_class_changed(rq, p, prev_class, oldprio);
4577 }
4578 task_rq_unlock(rq, &flags); 4717 task_rq_unlock(rq, &flags);
4579} 4718}
4580 4719
@@ -4822,12 +4961,15 @@ recheck:
4822 param->sched_priority > rlim_rtprio) 4961 param->sched_priority > rlim_rtprio)
4823 return -EPERM; 4962 return -EPERM;
4824 } 4963 }
4964
4825 /* 4965 /*
4826 * Like positive nice levels, dont allow tasks to 4966 * Treat SCHED_IDLE as nice 20. Only allow a switch to
4827 * move out of SCHED_IDLE either: 4967 * SCHED_NORMAL if the RLIMIT_NICE would normally permit it.
4828 */ 4968 */
4829 if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) 4969 if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) {
4830 return -EPERM; 4970 if (!can_nice(p, TASK_NICE(p)))
4971 return -EPERM;
4972 }
4831 4973
4832 /* can't change other user's priorities */ 4974 /* can't change other user's priorities */
4833 if (!check_same_owner(p)) 4975 if (!check_same_owner(p))
@@ -4902,11 +5044,10 @@ recheck:
4902 5044
4903 if (running) 5045 if (running)
4904 p->sched_class->set_curr_task(rq); 5046 p->sched_class->set_curr_task(rq);
4905 if (on_rq) { 5047 if (on_rq)
4906 activate_task(rq, p, 0); 5048 activate_task(rq, p, 0);
4907 5049
4908 check_class_changed(rq, p, prev_class, oldprio, running); 5050 check_class_changed(rq, p, prev_class, oldprio);
4909 }
4910 __task_rq_unlock(rq); 5051 __task_rq_unlock(rq);
4911 raw_spin_unlock_irqrestore(&p->pi_lock, flags); 5052 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
4912 5053
@@ -5323,6 +5464,65 @@ void __sched yield(void)
5323} 5464}
5324EXPORT_SYMBOL(yield); 5465EXPORT_SYMBOL(yield);
5325 5466
5467/**
5468 * yield_to - yield the current processor to another thread in
5469 * your thread group, or accelerate that thread toward the
5470 * processor it's on.
5471 *
5472 * It's the caller's job to ensure that the target task struct
5473 * can't go away on us before we can do any checks.
5474 *
5475 * Returns true if we indeed boosted the target task.
5476 */
5477bool __sched yield_to(struct task_struct *p, bool preempt)
5478{
5479 struct task_struct *curr = current;
5480 struct rq *rq, *p_rq;
5481 unsigned long flags;
5482 bool yielded = 0;
5483
5484 local_irq_save(flags);
5485 rq = this_rq();
5486
5487again:
5488 p_rq = task_rq(p);
5489 double_rq_lock(rq, p_rq);
5490 while (task_rq(p) != p_rq) {
5491 double_rq_unlock(rq, p_rq);
5492 goto again;
5493 }
5494
5495 if (!curr->sched_class->yield_to_task)
5496 goto out;
5497
5498 if (curr->sched_class != p->sched_class)
5499 goto out;
5500
5501 if (task_running(p_rq, p) || p->state)
5502 goto out;
5503
5504 yielded = curr->sched_class->yield_to_task(rq, p, preempt);
5505 if (yielded) {
5506 schedstat_inc(rq, yld_count);
5507 /*
5508 * Make p's CPU reschedule; pick_next_entity takes care of
5509 * fairness.
5510 */
5511 if (preempt && rq != p_rq)
5512 resched_task(p_rq->curr);
5513 }
5514
5515out:
5516 double_rq_unlock(rq, p_rq);
5517 local_irq_restore(flags);
5518
5519 if (yielded)
5520 schedule();
5521
5522 return yielded;
5523}
5524EXPORT_SYMBOL_GPL(yield_to);
5525
5326/* 5526/*
5327 * This task is about to go to sleep on IO. Increment rq->nr_iowait so 5527 * This task is about to go to sleep on IO. Increment rq->nr_iowait so
5328 * that process accounting knows that this is a task in IO wait state. 5528 * that process accounting knows that this is a task in IO wait state.
@@ -5571,7 +5771,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5571 * The idle tasks have their own, simple scheduling class: 5771 * The idle tasks have their own, simple scheduling class:
5572 */ 5772 */
5573 idle->sched_class = &idle_sched_class; 5773 idle->sched_class = &idle_sched_class;
5574 ftrace_graph_init_task(idle); 5774 ftrace_graph_init_idle_task(idle, cpu);
5575} 5775}
5576 5776
5577/* 5777/*
@@ -7796,6 +7996,10 @@ static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq)
7796 INIT_LIST_HEAD(&cfs_rq->tasks); 7996 INIT_LIST_HEAD(&cfs_rq->tasks);
7797#ifdef CONFIG_FAIR_GROUP_SCHED 7997#ifdef CONFIG_FAIR_GROUP_SCHED
7798 cfs_rq->rq = rq; 7998 cfs_rq->rq = rq;
7999 /* allow initial update_cfs_load() to truncate */
8000#ifdef CONFIG_SMP
8001 cfs_rq->load_stamp = 1;
8002#endif
7799#endif 8003#endif
7800 cfs_rq->min_vruntime = (u64)(-(1LL << 20)); 8004 cfs_rq->min_vruntime = (u64)(-(1LL << 20));
7801} 8005}
@@ -8109,6 +8313,8 @@ EXPORT_SYMBOL(__might_sleep);
8109#ifdef CONFIG_MAGIC_SYSRQ 8313#ifdef CONFIG_MAGIC_SYSRQ
8110static void normalize_task(struct rq *rq, struct task_struct *p) 8314static void normalize_task(struct rq *rq, struct task_struct *p)
8111{ 8315{
8316 const struct sched_class *prev_class = p->sched_class;
8317 int old_prio = p->prio;
8112 int on_rq; 8318 int on_rq;
8113 8319
8114 on_rq = p->se.on_rq; 8320 on_rq = p->se.on_rq;
@@ -8119,6 +8325,8 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
8119 activate_task(rq, p, 0); 8325 activate_task(rq, p, 0);
8120 resched_task(rq->curr); 8326 resched_task(rq->curr);
8121 } 8327 }
8328
8329 check_class_changed(rq, p, prev_class, old_prio);
8122} 8330}
8123 8331
8124void normalize_rt_tasks(void) 8332void normalize_rt_tasks(void)
@@ -8510,7 +8718,7 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
8510 /* Propagate contribution to hierarchy */ 8718 /* Propagate contribution to hierarchy */
8511 raw_spin_lock_irqsave(&rq->lock, flags); 8719 raw_spin_lock_irqsave(&rq->lock, flags);
8512 for_each_sched_entity(se) 8720 for_each_sched_entity(se)
8513 update_cfs_shares(group_cfs_rq(se), 0); 8721 update_cfs_shares(group_cfs_rq(se));
8514 raw_spin_unlock_irqrestore(&rq->lock, flags); 8722 raw_spin_unlock_irqrestore(&rq->lock, flags);
8515 } 8723 }
8516 8724
@@ -8884,7 +9092,8 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
8884} 9092}
8885 9093
8886static void 9094static void
8887cpu_cgroup_exit(struct cgroup_subsys *ss, struct task_struct *task) 9095cpu_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
9096 struct cgroup *old_cgrp, struct task_struct *task)
8888{ 9097{
8889 /* 9098 /*
8890 * cgroup_exit() is called in the copy_process() failure path. 9099 * cgroup_exit() is called in the copy_process() failure path.
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c
index 9fb656283157..5946ac515602 100644
--- a/kernel/sched_autogroup.c
+++ b/kernel/sched_autogroup.c
@@ -12,7 +12,6 @@ static atomic_t autogroup_seq_nr;
12static void __init autogroup_init(struct task_struct *init_task) 12static void __init autogroup_init(struct task_struct *init_task)
13{ 13{
14 autogroup_default.tg = &root_task_group; 14 autogroup_default.tg = &root_task_group;
15 root_task_group.autogroup = &autogroup_default;
16 kref_init(&autogroup_default.kref); 15 kref_init(&autogroup_default.kref);
17 init_rwsem(&autogroup_default.lock); 16 init_rwsem(&autogroup_default.lock);
18 init_task->signal->autogroup = &autogroup_default; 17 init_task->signal->autogroup = &autogroup_default;
@@ -130,7 +129,7 @@ task_wants_autogroup(struct task_struct *p, struct task_group *tg)
130 129
131static inline bool task_group_is_autogroup(struct task_group *tg) 130static inline bool task_group_is_autogroup(struct task_group *tg)
132{ 131{
133 return tg != &root_task_group && tg->autogroup; 132 return !!tg->autogroup;
134} 133}
135 134
136static inline struct task_group * 135static inline struct task_group *
@@ -161,11 +160,15 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag)
161 160
162 p->signal->autogroup = autogroup_kref_get(ag); 161 p->signal->autogroup = autogroup_kref_get(ag);
163 162
163 if (!ACCESS_ONCE(sysctl_sched_autogroup_enabled))
164 goto out;
165
164 t = p; 166 t = p;
165 do { 167 do {
166 sched_move_task(t); 168 sched_move_task(t);
167 } while_each_thread(p, t); 169 } while_each_thread(p, t);
168 170
171out:
169 unlock_task_sighand(p, &flags); 172 unlock_task_sighand(p, &flags);
170 autogroup_kref_put(prev); 173 autogroup_kref_put(prev);
171} 174}
@@ -247,10 +250,14 @@ void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m)
247{ 250{
248 struct autogroup *ag = autogroup_task_get(p); 251 struct autogroup *ag = autogroup_task_get(p);
249 252
253 if (!task_group_is_autogroup(ag->tg))
254 goto out;
255
250 down_read(&ag->lock); 256 down_read(&ag->lock);
251 seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice); 257 seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice);
252 up_read(&ag->lock); 258 up_read(&ag->lock);
253 259
260out:
254 autogroup_kref_put(ag); 261 autogroup_kref_put(ag);
255} 262}
256#endif /* CONFIG_PROC_FS */ 263#endif /* CONFIG_PROC_FS */
@@ -258,9 +265,7 @@ void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m)
258#ifdef CONFIG_SCHED_DEBUG 265#ifdef CONFIG_SCHED_DEBUG
259static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) 266static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
260{ 267{
261 int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled); 268 if (!task_group_is_autogroup(tg))
262
263 if (!enabled || !tg->autogroup)
264 return 0; 269 return 0;
265 270
266 return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); 271 return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id);
diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h
index 7b859ffe5dad..05577055cfca 100644
--- a/kernel/sched_autogroup.h
+++ b/kernel/sched_autogroup.h
@@ -1,6 +1,11 @@
1#ifdef CONFIG_SCHED_AUTOGROUP 1#ifdef CONFIG_SCHED_AUTOGROUP
2 2
3struct autogroup { 3struct autogroup {
4 /*
5 * reference doesn't mean how many thread attach to this
6 * autogroup now. It just stands for the number of task
7 * could use this autogroup.
8 */
4 struct kref kref; 9 struct kref kref;
5 struct task_group *tg; 10 struct task_group *tg;
6 struct rw_semaphore lock; 11 struct rw_semaphore lock;
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index eb6cb8edd075..7bacd83a4158 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -179,7 +179,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
179 179
180 raw_spin_lock_irqsave(&rq->lock, flags); 180 raw_spin_lock_irqsave(&rq->lock, flags);
181 if (cfs_rq->rb_leftmost) 181 if (cfs_rq->rb_leftmost)
182 MIN_vruntime = (__pick_next_entity(cfs_rq))->vruntime; 182 MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime;
183 last = __pick_last_entity(cfs_rq); 183 last = __pick_last_entity(cfs_rq);
184 if (last) 184 if (last)
185 max_vruntime = last->vruntime; 185 max_vruntime = last->vruntime;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 0c26e2df450e..3f7ec9e27ee1 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -69,14 +69,6 @@ static unsigned int sched_nr_latency = 8;
69unsigned int sysctl_sched_child_runs_first __read_mostly; 69unsigned int sysctl_sched_child_runs_first __read_mostly;
70 70
71/* 71/*
72 * sys_sched_yield() compat mode
73 *
74 * This option switches the agressive yield implementation of the
75 * old scheduler back on.
76 */
77unsigned int __read_mostly sysctl_sched_compat_yield;
78
79/*
80 * SCHED_OTHER wake-up granularity. 72 * SCHED_OTHER wake-up granularity.
81 * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) 73 * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
82 * 74 *
@@ -419,7 +411,7 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
419 rb_erase(&se->run_node, &cfs_rq->tasks_timeline); 411 rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
420} 412}
421 413
422static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) 414static struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
423{ 415{
424 struct rb_node *left = cfs_rq->rb_leftmost; 416 struct rb_node *left = cfs_rq->rb_leftmost;
425 417
@@ -429,6 +421,17 @@ static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq)
429 return rb_entry(left, struct sched_entity, run_node); 421 return rb_entry(left, struct sched_entity, run_node);
430} 422}
431 423
424static struct sched_entity *__pick_next_entity(struct sched_entity *se)
425{
426 struct rb_node *next = rb_next(&se->run_node);
427
428 if (!next)
429 return NULL;
430
431 return rb_entry(next, struct sched_entity, run_node);
432}
433
434#ifdef CONFIG_SCHED_DEBUG
432static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) 435static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
433{ 436{
434 struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); 437 struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);
@@ -443,7 +446,6 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
443 * Scheduling class statistics methods: 446 * Scheduling class statistics methods:
444 */ 447 */
445 448
446#ifdef CONFIG_SCHED_DEBUG
447int sched_proc_update_handler(struct ctl_table *table, int write, 449int sched_proc_update_handler(struct ctl_table *table, int write,
448 void __user *buffer, size_t *lenp, 450 void __user *buffer, size_t *lenp,
449 loff_t *ppos) 451 loff_t *ppos)
@@ -540,7 +542,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
540} 542}
541 543
542static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update); 544static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update);
543static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta); 545static void update_cfs_shares(struct cfs_rq *cfs_rq);
544 546
545/* 547/*
546 * Update the current task's runtime statistics. Skip current tasks that 548 * Update the current task's runtime statistics. Skip current tasks that
@@ -733,6 +735,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
733 now - cfs_rq->load_last > 4 * period) { 735 now - cfs_rq->load_last > 4 * period) {
734 cfs_rq->load_period = 0; 736 cfs_rq->load_period = 0;
735 cfs_rq->load_avg = 0; 737 cfs_rq->load_avg = 0;
738 delta = period - 1;
736 } 739 }
737 740
738 cfs_rq->load_stamp = now; 741 cfs_rq->load_stamp = now;
@@ -763,16 +766,15 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
763 list_del_leaf_cfs_rq(cfs_rq); 766 list_del_leaf_cfs_rq(cfs_rq);
764} 767}
765 768
766static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, 769static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
767 long weight_delta)
768{ 770{
769 long load_weight, load, shares; 771 long load_weight, load, shares;
770 772
771 load = cfs_rq->load.weight + weight_delta; 773 load = cfs_rq->load.weight;
772 774
773 load_weight = atomic_read(&tg->load_weight); 775 load_weight = atomic_read(&tg->load_weight);
774 load_weight -= cfs_rq->load_contribution;
775 load_weight += load; 776 load_weight += load;
777 load_weight -= cfs_rq->load_contribution;
776 778
777 shares = (tg->shares * load); 779 shares = (tg->shares * load);
778 if (load_weight) 780 if (load_weight)
@@ -790,7 +792,7 @@ static void update_entity_shares_tick(struct cfs_rq *cfs_rq)
790{ 792{
791 if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) { 793 if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) {
792 update_cfs_load(cfs_rq, 0); 794 update_cfs_load(cfs_rq, 0);
793 update_cfs_shares(cfs_rq, 0); 795 update_cfs_shares(cfs_rq);
794 } 796 }
795} 797}
796# else /* CONFIG_SMP */ 798# else /* CONFIG_SMP */
@@ -798,8 +800,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
798{ 800{
799} 801}
800 802
801static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, 803static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
802 long weight_delta)
803{ 804{
804 return tg->shares; 805 return tg->shares;
805} 806}
@@ -824,7 +825,7 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
824 account_entity_enqueue(cfs_rq, se); 825 account_entity_enqueue(cfs_rq, se);
825} 826}
826 827
827static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) 828static void update_cfs_shares(struct cfs_rq *cfs_rq)
828{ 829{
829 struct task_group *tg; 830 struct task_group *tg;
830 struct sched_entity *se; 831 struct sched_entity *se;
@@ -838,7 +839,7 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)
838 if (likely(se->load.weight == tg->shares)) 839 if (likely(se->load.weight == tg->shares))
839 return; 840 return;
840#endif 841#endif
841 shares = calc_cfs_shares(cfs_rq, tg, weight_delta); 842 shares = calc_cfs_shares(cfs_rq, tg);
842 843
843 reweight_entity(cfs_rq_of(se), se, shares); 844 reweight_entity(cfs_rq_of(se), se, shares);
844} 845}
@@ -847,7 +848,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
847{ 848{
848} 849}
849 850
850static inline void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) 851static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
851{ 852{
852} 853}
853 854
@@ -978,8 +979,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
978 */ 979 */
979 update_curr(cfs_rq); 980 update_curr(cfs_rq);
980 update_cfs_load(cfs_rq, 0); 981 update_cfs_load(cfs_rq, 0);
981 update_cfs_shares(cfs_rq, se->load.weight);
982 account_entity_enqueue(cfs_rq, se); 982 account_entity_enqueue(cfs_rq, se);
983 update_cfs_shares(cfs_rq);
983 984
984 if (flags & ENQUEUE_WAKEUP) { 985 if (flags & ENQUEUE_WAKEUP) {
985 place_entity(cfs_rq, se, 0); 986 place_entity(cfs_rq, se, 0);
@@ -996,19 +997,49 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
996 list_add_leaf_cfs_rq(cfs_rq); 997 list_add_leaf_cfs_rq(cfs_rq);
997} 998}
998 999
999static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) 1000static void __clear_buddies_last(struct sched_entity *se)
1001{
1002 for_each_sched_entity(se) {
1003 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1004 if (cfs_rq->last == se)
1005 cfs_rq->last = NULL;
1006 else
1007 break;
1008 }
1009}
1010
1011static void __clear_buddies_next(struct sched_entity *se)
1000{ 1012{
1001 if (!se || cfs_rq->last == se) 1013 for_each_sched_entity(se) {
1002 cfs_rq->last = NULL; 1014 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1015 if (cfs_rq->next == se)
1016 cfs_rq->next = NULL;
1017 else
1018 break;
1019 }
1020}
1003 1021
1004 if (!se || cfs_rq->next == se) 1022static void __clear_buddies_skip(struct sched_entity *se)
1005 cfs_rq->next = NULL; 1023{
1024 for_each_sched_entity(se) {
1025 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1026 if (cfs_rq->skip == se)
1027 cfs_rq->skip = NULL;
1028 else
1029 break;
1030 }
1006} 1031}
1007 1032
1008static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) 1033static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
1009{ 1034{
1010 for_each_sched_entity(se) 1035 if (cfs_rq->last == se)
1011 __clear_buddies(cfs_rq_of(se), se); 1036 __clear_buddies_last(se);
1037
1038 if (cfs_rq->next == se)
1039 __clear_buddies_next(se);
1040
1041 if (cfs_rq->skip == se)
1042 __clear_buddies_skip(se);
1012} 1043}
1013 1044
1014static void 1045static void
@@ -1041,7 +1072,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
1041 update_cfs_load(cfs_rq, 0); 1072 update_cfs_load(cfs_rq, 0);
1042 account_entity_dequeue(cfs_rq, se); 1073 account_entity_dequeue(cfs_rq, se);
1043 update_min_vruntime(cfs_rq); 1074 update_min_vruntime(cfs_rq);
1044 update_cfs_shares(cfs_rq, 0); 1075 update_cfs_shares(cfs_rq);
1045 1076
1046 /* 1077 /*
1047 * Normalize the entity after updating the min_vruntime because the 1078 * Normalize the entity after updating the min_vruntime because the
@@ -1084,7 +1115,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
1084 return; 1115 return;
1085 1116
1086 if (cfs_rq->nr_running > 1) { 1117 if (cfs_rq->nr_running > 1) {
1087 struct sched_entity *se = __pick_next_entity(cfs_rq); 1118 struct sched_entity *se = __pick_first_entity(cfs_rq);
1088 s64 delta = curr->vruntime - se->vruntime; 1119 s64 delta = curr->vruntime - se->vruntime;
1089 1120
1090 if (delta < 0) 1121 if (delta < 0)
@@ -1128,13 +1159,27 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
1128static int 1159static int
1129wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); 1160wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
1130 1161
1162/*
1163 * Pick the next process, keeping these things in mind, in this order:
1164 * 1) keep things fair between processes/task groups
1165 * 2) pick the "next" process, since someone really wants that to run
1166 * 3) pick the "last" process, for cache locality
1167 * 4) do not run the "skip" process, if something else is available
1168 */
1131static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) 1169static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
1132{ 1170{
1133 struct sched_entity *se = __pick_next_entity(cfs_rq); 1171 struct sched_entity *se = __pick_first_entity(cfs_rq);
1134 struct sched_entity *left = se; 1172 struct sched_entity *left = se;
1135 1173
1136 if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1) 1174 /*
1137 se = cfs_rq->next; 1175 * Avoid running the skip buddy, if running something else can
1176 * be done without getting too unfair.
1177 */
1178 if (cfs_rq->skip == se) {
1179 struct sched_entity *second = __pick_next_entity(se);
1180 if (second && wakeup_preempt_entity(second, left) < 1)
1181 se = second;
1182 }
1138 1183
1139 /* 1184 /*
1140 * Prefer last buddy, try to return the CPU to a preempted task. 1185 * Prefer last buddy, try to return the CPU to a preempted task.
@@ -1142,6 +1187,12 @@ static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
1142 if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1) 1187 if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1)
1143 se = cfs_rq->last; 1188 se = cfs_rq->last;
1144 1189
1190 /*
1191 * Someone really wants this to run. If it's not unfair, run it.
1192 */
1193 if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1)
1194 se = cfs_rq->next;
1195
1145 clear_buddies(cfs_rq, se); 1196 clear_buddies(cfs_rq, se);
1146 1197
1147 return se; 1198 return se;
@@ -1282,7 +1333,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
1282 struct cfs_rq *cfs_rq = cfs_rq_of(se); 1333 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1283 1334
1284 update_cfs_load(cfs_rq, 0); 1335 update_cfs_load(cfs_rq, 0);
1285 update_cfs_shares(cfs_rq, 0); 1336 update_cfs_shares(cfs_rq);
1286 } 1337 }
1287 1338
1288 hrtick_update(rq); 1339 hrtick_update(rq);
@@ -1312,58 +1363,12 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
1312 struct cfs_rq *cfs_rq = cfs_rq_of(se); 1363 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1313 1364
1314 update_cfs_load(cfs_rq, 0); 1365 update_cfs_load(cfs_rq, 0);
1315 update_cfs_shares(cfs_rq, 0); 1366 update_cfs_shares(cfs_rq);
1316 } 1367 }
1317 1368
1318 hrtick_update(rq); 1369 hrtick_update(rq);
1319} 1370}
1320 1371
1321/*
1322 * sched_yield() support is very simple - we dequeue and enqueue.
1323 *
1324 * If compat_yield is turned on then we requeue to the end of the tree.
1325 */
1326static void yield_task_fair(struct rq *rq)
1327{
1328 struct task_struct *curr = rq->curr;
1329 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1330 struct sched_entity *rightmost, *se = &curr->se;
1331
1332 /*
1333 * Are we the only task in the tree?
1334 */
1335 if (unlikely(cfs_rq->nr_running == 1))
1336 return;
1337
1338 clear_buddies(cfs_rq, se);
1339
1340 if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {
1341 update_rq_clock(rq);
1342 /*
1343 * Update run-time statistics of the 'current'.
1344 */
1345 update_curr(cfs_rq);
1346
1347 return;
1348 }
1349 /*
1350 * Find the rightmost entry in the rbtree:
1351 */
1352 rightmost = __pick_last_entity(cfs_rq);
1353 /*
1354 * Already in the rightmost position?
1355 */
1356 if (unlikely(!rightmost || entity_before(rightmost, se)))
1357 return;
1358
1359 /*
1360 * Minimally necessary key value to be last in the tree:
1361 * Upon rescheduling, sched_class::put_prev_task() will place
1362 * 'current' within the tree based on its new key value.
1363 */
1364 se->vruntime = rightmost->vruntime + 1;
1365}
1366
1367#ifdef CONFIG_SMP 1372#ifdef CONFIG_SMP
1368 1373
1369static void task_waking_fair(struct rq *rq, struct task_struct *p) 1374static void task_waking_fair(struct rq *rq, struct task_struct *p)
@@ -1834,6 +1839,14 @@ static void set_next_buddy(struct sched_entity *se)
1834 } 1839 }
1835} 1840}
1836 1841
1842static void set_skip_buddy(struct sched_entity *se)
1843{
1844 if (likely(task_of(se)->policy != SCHED_IDLE)) {
1845 for_each_sched_entity(se)
1846 cfs_rq_of(se)->skip = se;
1847 }
1848}
1849
1837/* 1850/*
1838 * Preempt the current task with a newly woken task if needed: 1851 * Preempt the current task with a newly woken task if needed:
1839 */ 1852 */
@@ -1857,16 +1870,18 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
1857 if (test_tsk_need_resched(curr)) 1870 if (test_tsk_need_resched(curr))
1858 return; 1871 return;
1859 1872
1873 /* Idle tasks are by definition preempted by non-idle tasks. */
1874 if (unlikely(curr->policy == SCHED_IDLE) &&
1875 likely(p->policy != SCHED_IDLE))
1876 goto preempt;
1877
1860 /* 1878 /*
1861 * Batch and idle tasks do not preempt (their preemption is driven by 1879 * Batch and idle tasks do not preempt non-idle tasks (their preemption
1862 * the tick): 1880 * is driven by the tick):
1863 */ 1881 */
1864 if (unlikely(p->policy != SCHED_NORMAL)) 1882 if (unlikely(p->policy != SCHED_NORMAL))
1865 return; 1883 return;
1866 1884
1867 /* Idle tasks are by definition preempted by everybody. */
1868 if (unlikely(curr->policy == SCHED_IDLE))
1869 goto preempt;
1870 1885
1871 if (!sched_feat(WAKEUP_PREEMPT)) 1886 if (!sched_feat(WAKEUP_PREEMPT))
1872 return; 1887 return;
@@ -1932,6 +1947,51 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
1932 } 1947 }
1933} 1948}
1934 1949
1950/*
1951 * sched_yield() is very simple
1952 *
1953 * The magic of dealing with the ->skip buddy is in pick_next_entity.
1954 */
1955static void yield_task_fair(struct rq *rq)
1956{
1957 struct task_struct *curr = rq->curr;
1958 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1959 struct sched_entity *se = &curr->se;
1960
1961 /*
1962 * Are we the only task in the tree?
1963 */
1964 if (unlikely(rq->nr_running == 1))
1965 return;
1966
1967 clear_buddies(cfs_rq, se);
1968
1969 if (curr->policy != SCHED_BATCH) {
1970 update_rq_clock(rq);
1971 /*
1972 * Update run-time statistics of the 'current'.
1973 */
1974 update_curr(cfs_rq);
1975 }
1976
1977 set_skip_buddy(se);
1978}
1979
1980static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preempt)
1981{
1982 struct sched_entity *se = &p->se;
1983
1984 if (!se->on_rq)
1985 return false;
1986
1987 /* Tell the scheduler that we'd really like pse to run next. */
1988 set_next_buddy(se);
1989
1990 yield_task_fair(rq);
1991
1992 return true;
1993}
1994
1935#ifdef CONFIG_SMP 1995#ifdef CONFIG_SMP
1936/************************************************** 1996/**************************************************
1937 * Fair scheduling class load-balancing methods: 1997 * Fair scheduling class load-balancing methods:
@@ -2123,7 +2183,7 @@ static int update_shares_cpu(struct task_group *tg, int cpu)
2123 * We need to update shares after updating tg->load_weight in 2183 * We need to update shares after updating tg->load_weight in
2124 * order to adjust the weight of groups with long running tasks. 2184 * order to adjust the weight of groups with long running tasks.
2125 */ 2185 */
2126 update_cfs_shares(cfs_rq, 0); 2186 update_cfs_shares(cfs_rq);
2127 2187
2128 raw_spin_unlock_irqrestore(&rq->lock, flags); 2188 raw_spin_unlock_irqrestore(&rq->lock, flags);
2129 2189
@@ -2610,7 +2670,6 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
2610 * @this_cpu: Cpu for which load balance is currently performed. 2670 * @this_cpu: Cpu for which load balance is currently performed.
2611 * @idle: Idle status of this_cpu 2671 * @idle: Idle status of this_cpu
2612 * @load_idx: Load index of sched_domain of this_cpu for load calc. 2672 * @load_idx: Load index of sched_domain of this_cpu for load calc.
2613 * @sd_idle: Idle status of the sched_domain containing group.
2614 * @local_group: Does group contain this_cpu. 2673 * @local_group: Does group contain this_cpu.
2615 * @cpus: Set of cpus considered for load balancing. 2674 * @cpus: Set of cpus considered for load balancing.
2616 * @balance: Should we balance. 2675 * @balance: Should we balance.
@@ -2618,7 +2677,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
2618 */ 2677 */
2619static inline void update_sg_lb_stats(struct sched_domain *sd, 2678static inline void update_sg_lb_stats(struct sched_domain *sd,
2620 struct sched_group *group, int this_cpu, 2679 struct sched_group *group, int this_cpu,
2621 enum cpu_idle_type idle, int load_idx, int *sd_idle, 2680 enum cpu_idle_type idle, int load_idx,
2622 int local_group, const struct cpumask *cpus, 2681 int local_group, const struct cpumask *cpus,
2623 int *balance, struct sg_lb_stats *sgs) 2682 int *balance, struct sg_lb_stats *sgs)
2624{ 2683{
@@ -2638,9 +2697,6 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2638 for_each_cpu_and(i, sched_group_cpus(group), cpus) { 2697 for_each_cpu_and(i, sched_group_cpus(group), cpus) {
2639 struct rq *rq = cpu_rq(i); 2698 struct rq *rq = cpu_rq(i);
2640 2699
2641 if (*sd_idle && rq->nr_running)
2642 *sd_idle = 0;
2643
2644 /* Bias balancing toward cpus of our domain */ 2700 /* Bias balancing toward cpus of our domain */
2645 if (local_group) { 2701 if (local_group) {
2646 if (idle_cpu(i) && !first_idle_cpu) { 2702 if (idle_cpu(i) && !first_idle_cpu) {
@@ -2685,7 +2741,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2685 2741
2686 /* 2742 /*
2687 * Consider the group unbalanced when the imbalance is larger 2743 * Consider the group unbalanced when the imbalance is larger
2688 * than the average weight of two tasks. 2744 * than the average weight of a task.
2689 * 2745 *
2690 * APZ: with cgroup the avg task weight can vary wildly and 2746 * APZ: with cgroup the avg task weight can vary wildly and
2691 * might not be a suitable number - should we keep a 2747 * might not be a suitable number - should we keep a
@@ -2695,7 +2751,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2695 if (sgs->sum_nr_running) 2751 if (sgs->sum_nr_running)
2696 avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running; 2752 avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
2697 2753
2698 if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task && max_nr_running > 1) 2754 if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1)
2699 sgs->group_imb = 1; 2755 sgs->group_imb = 1;
2700 2756
2701 sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); 2757 sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
@@ -2755,15 +2811,13 @@ static bool update_sd_pick_busiest(struct sched_domain *sd,
2755 * @sd: sched_domain whose statistics are to be updated. 2811 * @sd: sched_domain whose statistics are to be updated.
2756 * @this_cpu: Cpu for which load balance is currently performed. 2812 * @this_cpu: Cpu for which load balance is currently performed.
2757 * @idle: Idle status of this_cpu 2813 * @idle: Idle status of this_cpu
2758 * @sd_idle: Idle status of the sched_domain containing sg.
2759 * @cpus: Set of cpus considered for load balancing. 2814 * @cpus: Set of cpus considered for load balancing.
2760 * @balance: Should we balance. 2815 * @balance: Should we balance.
2761 * @sds: variable to hold the statistics for this sched_domain. 2816 * @sds: variable to hold the statistics for this sched_domain.
2762 */ 2817 */
2763static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, 2818static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
2764 enum cpu_idle_type idle, int *sd_idle, 2819 enum cpu_idle_type idle, const struct cpumask *cpus,
2765 const struct cpumask *cpus, int *balance, 2820 int *balance, struct sd_lb_stats *sds)
2766 struct sd_lb_stats *sds)
2767{ 2821{
2768 struct sched_domain *child = sd->child; 2822 struct sched_domain *child = sd->child;
2769 struct sched_group *sg = sd->groups; 2823 struct sched_group *sg = sd->groups;
@@ -2781,7 +2835,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
2781 2835
2782 local_group = cpumask_test_cpu(this_cpu, sched_group_cpus(sg)); 2836 local_group = cpumask_test_cpu(this_cpu, sched_group_cpus(sg));
2783 memset(&sgs, 0, sizeof(sgs)); 2837 memset(&sgs, 0, sizeof(sgs));
2784 update_sg_lb_stats(sd, sg, this_cpu, idle, load_idx, sd_idle, 2838 update_sg_lb_stats(sd, sg, this_cpu, idle, load_idx,
2785 local_group, cpus, balance, &sgs); 2839 local_group, cpus, balance, &sgs);
2786 2840
2787 if (local_group && !(*balance)) 2841 if (local_group && !(*balance))
@@ -3033,7 +3087,6 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
3033 * @imbalance: Variable which stores amount of weighted load which should 3087 * @imbalance: Variable which stores amount of weighted load which should
3034 * be moved to restore balance/put a group to idle. 3088 * be moved to restore balance/put a group to idle.
3035 * @idle: The idle status of this_cpu. 3089 * @idle: The idle status of this_cpu.
3036 * @sd_idle: The idleness of sd
3037 * @cpus: The set of CPUs under consideration for load-balancing. 3090 * @cpus: The set of CPUs under consideration for load-balancing.
3038 * @balance: Pointer to a variable indicating if this_cpu 3091 * @balance: Pointer to a variable indicating if this_cpu
3039 * is the appropriate cpu to perform load balancing at this_level. 3092 * is the appropriate cpu to perform load balancing at this_level.
@@ -3046,7 +3099,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
3046static struct sched_group * 3099static struct sched_group *
3047find_busiest_group(struct sched_domain *sd, int this_cpu, 3100find_busiest_group(struct sched_domain *sd, int this_cpu,
3048 unsigned long *imbalance, enum cpu_idle_type idle, 3101 unsigned long *imbalance, enum cpu_idle_type idle,
3049 int *sd_idle, const struct cpumask *cpus, int *balance) 3102 const struct cpumask *cpus, int *balance)
3050{ 3103{
3051 struct sd_lb_stats sds; 3104 struct sd_lb_stats sds;
3052 3105
@@ -3056,22 +3109,11 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3056 * Compute the various statistics relavent for load balancing at 3109 * Compute the various statistics relavent for load balancing at
3057 * this level. 3110 * this level.
3058 */ 3111 */
3059 update_sd_lb_stats(sd, this_cpu, idle, sd_idle, cpus, 3112 update_sd_lb_stats(sd, this_cpu, idle, cpus, balance, &sds);
3060 balance, &sds); 3113
3061 3114 /*
3062 /* Cases where imbalance does not exist from POV of this_cpu */ 3115 * this_cpu is not the appropriate cpu to perform load balancing at
3063 /* 1) this_cpu is not the appropriate cpu to perform load balancing 3116 * this level.
3064 * at this level.
3065 * 2) There is no busy sibling group to pull from.
3066 * 3) This group is the busiest group.
3067 * 4) This group is more busy than the avg busieness at this
3068 * sched_domain.
3069 * 5) The imbalance is within the specified limit.
3070 *
3071 * Note: when doing newidle balance, if the local group has excess
3072 * capacity (i.e. nr_running < group_capacity) and the busiest group
3073 * does not have any capacity, we force a load balance to pull tasks
3074 * to the local group. In this case, we skip past checks 3, 4 and 5.
3075 */ 3117 */
3076 if (!(*balance)) 3118 if (!(*balance))
3077 goto ret; 3119 goto ret;
@@ -3080,41 +3122,55 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3080 check_asym_packing(sd, &sds, this_cpu, imbalance)) 3122 check_asym_packing(sd, &sds, this_cpu, imbalance))
3081 return sds.busiest; 3123 return sds.busiest;
3082 3124
3125 /* There is no busy sibling group to pull tasks from */
3083 if (!sds.busiest || sds.busiest_nr_running == 0) 3126 if (!sds.busiest || sds.busiest_nr_running == 0)
3084 goto out_balanced; 3127 goto out_balanced;
3085 3128
3086 /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */ 3129 /*
3130 * If the busiest group is imbalanced the below checks don't
3131 * work because they assumes all things are equal, which typically
3132 * isn't true due to cpus_allowed constraints and the like.
3133 */
3134 if (sds.group_imb)
3135 goto force_balance;
3136
3137 /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */
3087 if (idle == CPU_NEWLY_IDLE && sds.this_has_capacity && 3138 if (idle == CPU_NEWLY_IDLE && sds.this_has_capacity &&
3088 !sds.busiest_has_capacity) 3139 !sds.busiest_has_capacity)
3089 goto force_balance; 3140 goto force_balance;
3090 3141
3142 /*
3143 * If the local group is more busy than the selected busiest group
3144 * don't try and pull any tasks.
3145 */
3091 if (sds.this_load >= sds.max_load) 3146 if (sds.this_load >= sds.max_load)
3092 goto out_balanced; 3147 goto out_balanced;
3093 3148
3149 /*
3150 * Don't pull any tasks if this group is already above the domain
3151 * average load.
3152 */
3094 sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr; 3153 sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr;
3095
3096 if (sds.this_load >= sds.avg_load) 3154 if (sds.this_load >= sds.avg_load)
3097 goto out_balanced; 3155 goto out_balanced;
3098 3156
3099 /* 3157 if (idle == CPU_IDLE) {
3100 * In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative.
3101 * And to check for busy balance use !idle_cpu instead of
3102 * CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE
3103 * even when they are idle.
3104 */
3105 if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) {
3106 if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
3107 goto out_balanced;
3108 } else {
3109 /* 3158 /*
3110 * This cpu is idle. If the busiest group load doesn't 3159 * This cpu is idle. If the busiest group load doesn't
3111 * have more tasks than the number of available cpu's and 3160 * have more tasks than the number of available cpu's and
3112 * there is no imbalance between this and busiest group 3161 * there is no imbalance between this and busiest group
3113 * wrt to idle cpu's, it is balanced. 3162 * wrt to idle cpu's, it is balanced.
3114 */ 3163 */
3115 if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) && 3164 if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) &&
3116 sds.busiest_nr_running <= sds.busiest_group_weight) 3165 sds.busiest_nr_running <= sds.busiest_group_weight)
3117 goto out_balanced; 3166 goto out_balanced;
3167 } else {
3168 /*
3169 * In the CPU_NEWLY_IDLE, CPU_NOT_IDLE cases, use
3170 * imbalance_pct to be conservative.
3171 */
3172 if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
3173 goto out_balanced;
3118 } 3174 }
3119 3175
3120force_balance: 3176force_balance:
@@ -3193,7 +3249,7 @@ find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
3193/* Working cpumask for load_balance and load_balance_newidle. */ 3249/* Working cpumask for load_balance and load_balance_newidle. */
3194static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask); 3250static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
3195 3251
3196static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle, 3252static int need_active_balance(struct sched_domain *sd, int idle,
3197 int busiest_cpu, int this_cpu) 3253 int busiest_cpu, int this_cpu)
3198{ 3254{
3199 if (idle == CPU_NEWLY_IDLE) { 3255 if (idle == CPU_NEWLY_IDLE) {
@@ -3225,10 +3281,6 @@ static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle,
3225 * move_tasks() will succeed. ld_moved will be true and this 3281 * move_tasks() will succeed. ld_moved will be true and this
3226 * active balance code will not be triggered. 3282 * active balance code will not be triggered.
3227 */ 3283 */
3228 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
3229 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
3230 return 0;
3231
3232 if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP) 3284 if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP)
3233 return 0; 3285 return 0;
3234 } 3286 }
@@ -3246,7 +3298,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
3246 struct sched_domain *sd, enum cpu_idle_type idle, 3298 struct sched_domain *sd, enum cpu_idle_type idle,
3247 int *balance) 3299 int *balance)
3248{ 3300{
3249 int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; 3301 int ld_moved, all_pinned = 0, active_balance = 0;
3250 struct sched_group *group; 3302 struct sched_group *group;
3251 unsigned long imbalance; 3303 unsigned long imbalance;
3252 struct rq *busiest; 3304 struct rq *busiest;
@@ -3255,20 +3307,10 @@ static int load_balance(int this_cpu, struct rq *this_rq,
3255 3307
3256 cpumask_copy(cpus, cpu_active_mask); 3308 cpumask_copy(cpus, cpu_active_mask);
3257 3309
3258 /*
3259 * When power savings policy is enabled for the parent domain, idle
3260 * sibling can pick up load irrespective of busy siblings. In this case,
3261 * let the state of idle sibling percolate up as CPU_IDLE, instead of
3262 * portraying it as CPU_NOT_IDLE.
3263 */
3264 if (idle != CPU_NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
3265 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
3266 sd_idle = 1;
3267
3268 schedstat_inc(sd, lb_count[idle]); 3310 schedstat_inc(sd, lb_count[idle]);
3269 3311
3270redo: 3312redo:
3271 group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, 3313 group = find_busiest_group(sd, this_cpu, &imbalance, idle,
3272 cpus, balance); 3314 cpus, balance);
3273 3315
3274 if (*balance == 0) 3316 if (*balance == 0)
@@ -3330,8 +3372,7 @@ redo:
3330 if (idle != CPU_NEWLY_IDLE) 3372 if (idle != CPU_NEWLY_IDLE)
3331 sd->nr_balance_failed++; 3373 sd->nr_balance_failed++;
3332 3374
3333 if (need_active_balance(sd, sd_idle, idle, cpu_of(busiest), 3375 if (need_active_balance(sd, idle, cpu_of(busiest), this_cpu)) {
3334 this_cpu)) {
3335 raw_spin_lock_irqsave(&busiest->lock, flags); 3376 raw_spin_lock_irqsave(&busiest->lock, flags);
3336 3377
3337 /* don't kick the active_load_balance_cpu_stop, 3378 /* don't kick the active_load_balance_cpu_stop,
@@ -3386,10 +3427,6 @@ redo:
3386 sd->balance_interval *= 2; 3427 sd->balance_interval *= 2;
3387 } 3428 }
3388 3429
3389 if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
3390 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
3391 ld_moved = -1;
3392
3393 goto out; 3430 goto out;
3394 3431
3395out_balanced: 3432out_balanced:
@@ -3403,11 +3440,7 @@ out_one_pinned:
3403 (sd->balance_interval < sd->max_interval)) 3440 (sd->balance_interval < sd->max_interval))
3404 sd->balance_interval *= 2; 3441 sd->balance_interval *= 2;
3405 3442
3406 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && 3443 ld_moved = 0;
3407 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
3408 ld_moved = -1;
3409 else
3410 ld_moved = 0;
3411out: 3444out:
3412 return ld_moved; 3445 return ld_moved;
3413} 3446}
@@ -3831,8 +3864,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
3831 if (load_balance(cpu, rq, sd, idle, &balance)) { 3864 if (load_balance(cpu, rq, sd, idle, &balance)) {
3832 /* 3865 /*
3833 * We've pulled tasks over so either we're no 3866 * We've pulled tasks over so either we're no
3834 * longer idle, or one of our SMT siblings is 3867 * longer idle.
3835 * not idle.
3836 */ 3868 */
3837 idle = CPU_NOT_IDLE; 3869 idle = CPU_NOT_IDLE;
3838 } 3870 }
@@ -4079,33 +4111,62 @@ static void task_fork_fair(struct task_struct *p)
4079 * Priority of the task has changed. Check to see if we preempt 4111 * Priority of the task has changed. Check to see if we preempt
4080 * the current task. 4112 * the current task.
4081 */ 4113 */
4082static void prio_changed_fair(struct rq *rq, struct task_struct *p, 4114static void
4083 int oldprio, int running) 4115prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
4084{ 4116{
4117 if (!p->se.on_rq)
4118 return;
4119
4085 /* 4120 /*
4086 * Reschedule if we are currently running on this runqueue and 4121 * Reschedule if we are currently running on this runqueue and
4087 * our priority decreased, or if we are not currently running on 4122 * our priority decreased, or if we are not currently running on
4088 * this runqueue and our priority is higher than the current's 4123 * this runqueue and our priority is higher than the current's
4089 */ 4124 */
4090 if (running) { 4125 if (rq->curr == p) {
4091 if (p->prio > oldprio) 4126 if (p->prio > oldprio)
4092 resched_task(rq->curr); 4127 resched_task(rq->curr);
4093 } else 4128 } else
4094 check_preempt_curr(rq, p, 0); 4129 check_preempt_curr(rq, p, 0);
4095} 4130}
4096 4131
4132static void switched_from_fair(struct rq *rq, struct task_struct *p)
4133{
4134 struct sched_entity *se = &p->se;
4135 struct cfs_rq *cfs_rq = cfs_rq_of(se);
4136
4137 /*
4138 * Ensure the task's vruntime is normalized, so that when its
4139 * switched back to the fair class the enqueue_entity(.flags=0) will
4140 * do the right thing.
4141 *
4142 * If it was on_rq, then the dequeue_entity(.flags=0) will already
4143 * have normalized the vruntime, if it was !on_rq, then only when
4144 * the task is sleeping will it still have non-normalized vruntime.
4145 */
4146 if (!se->on_rq && p->state != TASK_RUNNING) {
4147 /*
4148 * Fix up our vruntime so that the current sleep doesn't
4149 * cause 'unlimited' sleep bonus.
4150 */
4151 place_entity(cfs_rq, se, 0);
4152 se->vruntime -= cfs_rq->min_vruntime;
4153 }
4154}
4155
4097/* 4156/*
4098 * We switched to the sched_fair class. 4157 * We switched to the sched_fair class.
4099 */ 4158 */
4100static void switched_to_fair(struct rq *rq, struct task_struct *p, 4159static void switched_to_fair(struct rq *rq, struct task_struct *p)
4101 int running)
4102{ 4160{
4161 if (!p->se.on_rq)
4162 return;
4163
4103 /* 4164 /*
4104 * We were most likely switched from sched_rt, so 4165 * We were most likely switched from sched_rt, so
4105 * kick off the schedule if running, otherwise just see 4166 * kick off the schedule if running, otherwise just see
4106 * if we can still preempt the current task. 4167 * if we can still preempt the current task.
4107 */ 4168 */
4108 if (running) 4169 if (rq->curr == p)
4109 resched_task(rq->curr); 4170 resched_task(rq->curr);
4110 else 4171 else
4111 check_preempt_curr(rq, p, 0); 4172 check_preempt_curr(rq, p, 0);
@@ -4171,6 +4232,7 @@ static const struct sched_class fair_sched_class = {
4171 .enqueue_task = enqueue_task_fair, 4232 .enqueue_task = enqueue_task_fair,
4172 .dequeue_task = dequeue_task_fair, 4233 .dequeue_task = dequeue_task_fair,
4173 .yield_task = yield_task_fair, 4234 .yield_task = yield_task_fair,
4235 .yield_to_task = yield_to_task_fair,
4174 4236
4175 .check_preempt_curr = check_preempt_wakeup, 4237 .check_preempt_curr = check_preempt_wakeup,
4176 4238
@@ -4191,6 +4253,7 @@ static const struct sched_class fair_sched_class = {
4191 .task_fork = task_fork_fair, 4253 .task_fork = task_fork_fair,
4192 4254
4193 .prio_changed = prio_changed_fair, 4255 .prio_changed = prio_changed_fair,
4256 .switched_from = switched_from_fair,
4194 .switched_to = switched_to_fair, 4257 .switched_to = switched_to_fair,
4195 4258
4196 .get_rr_interval = get_rr_interval_fair, 4259 .get_rr_interval = get_rr_interval_fair,
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 9fa0f402c87c..c82f26c1b7c3 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -52,31 +52,15 @@ static void set_curr_task_idle(struct rq *rq)
52{ 52{
53} 53}
54 54
55static void switched_to_idle(struct rq *rq, struct task_struct *p, 55static void switched_to_idle(struct rq *rq, struct task_struct *p)
56 int running)
57{ 56{
58 /* Can this actually happen?? */ 57 BUG();
59 if (running)
60 resched_task(rq->curr);
61 else
62 check_preempt_curr(rq, p, 0);
63} 58}
64 59
65static void prio_changed_idle(struct rq *rq, struct task_struct *p, 60static void
66 int oldprio, int running) 61prio_changed_idle(struct rq *rq, struct task_struct *p, int oldprio)
67{ 62{
68 /* This can happen for hot plug CPUS */ 63 BUG();
69
70 /*
71 * Reschedule if we are currently running on this runqueue and
72 * our priority decreased, or if we are not currently running on
73 * this runqueue and our priority is higher than the current's
74 */
75 if (running) {
76 if (p->prio > oldprio)
77 resched_task(rq->curr);
78 } else
79 check_preempt_curr(rq, p, 0);
80} 64}
81 65
82static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task) 66static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index ad6267714c84..db308cb08b75 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -210,11 +210,12 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
210 210
211static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 211static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
212{ 212{
213 int this_cpu = smp_processor_id();
214 struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; 213 struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
215 struct sched_rt_entity *rt_se; 214 struct sched_rt_entity *rt_se;
216 215
217 rt_se = rt_rq->tg->rt_se[this_cpu]; 216 int cpu = cpu_of(rq_of_rt_rq(rt_rq));
217
218 rt_se = rt_rq->tg->rt_se[cpu];
218 219
219 if (rt_rq->rt_nr_running) { 220 if (rt_rq->rt_nr_running) {
220 if (rt_se && !on_rt_rq(rt_se)) 221 if (rt_se && !on_rt_rq(rt_se))
@@ -226,10 +227,10 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
226 227
227static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 228static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
228{ 229{
229 int this_cpu = smp_processor_id();
230 struct sched_rt_entity *rt_se; 230 struct sched_rt_entity *rt_se;
231 int cpu = cpu_of(rq_of_rt_rq(rt_rq));
231 232
232 rt_se = rt_rq->tg->rt_se[this_cpu]; 233 rt_se = rt_rq->tg->rt_se[cpu];
233 234
234 if (rt_se && on_rt_rq(rt_se)) 235 if (rt_se && on_rt_rq(rt_se))
235 dequeue_rt_entity(rt_se); 236 dequeue_rt_entity(rt_se);
@@ -565,8 +566,11 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
565 if (rt_rq->rt_time || rt_rq->rt_nr_running) 566 if (rt_rq->rt_time || rt_rq->rt_nr_running)
566 idle = 0; 567 idle = 0;
567 raw_spin_unlock(&rt_rq->rt_runtime_lock); 568 raw_spin_unlock(&rt_rq->rt_runtime_lock);
568 } else if (rt_rq->rt_nr_running) 569 } else if (rt_rq->rt_nr_running) {
569 idle = 0; 570 idle = 0;
571 if (!rt_rq_throttled(rt_rq))
572 enqueue = 1;
573 }
570 574
571 if (enqueue) 575 if (enqueue)
572 sched_rt_rq_enqueue(rt_rq); 576 sched_rt_rq_enqueue(rt_rq);
@@ -1595,8 +1599,7 @@ static void rq_offline_rt(struct rq *rq)
1595 * When switch from the rt queue, we bring ourselves to a position 1599 * When switch from the rt queue, we bring ourselves to a position
1596 * that we might want to pull RT tasks from other runqueues. 1600 * that we might want to pull RT tasks from other runqueues.
1597 */ 1601 */
1598static void switched_from_rt(struct rq *rq, struct task_struct *p, 1602static void switched_from_rt(struct rq *rq, struct task_struct *p)
1599 int running)
1600{ 1603{
1601 /* 1604 /*
1602 * If there are other RT tasks then we will reschedule 1605 * If there are other RT tasks then we will reschedule
@@ -1605,7 +1608,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p,
1605 * we may need to handle the pulling of RT tasks 1608 * we may need to handle the pulling of RT tasks
1606 * now. 1609 * now.
1607 */ 1610 */
1608 if (!rq->rt.rt_nr_running) 1611 if (p->se.on_rq && !rq->rt.rt_nr_running)
1609 pull_rt_task(rq); 1612 pull_rt_task(rq);
1610} 1613}
1611 1614
@@ -1624,8 +1627,7 @@ static inline void init_sched_rt_class(void)
1624 * with RT tasks. In this case we try to push them off to 1627 * with RT tasks. In this case we try to push them off to
1625 * other runqueues. 1628 * other runqueues.
1626 */ 1629 */
1627static void switched_to_rt(struct rq *rq, struct task_struct *p, 1630static void switched_to_rt(struct rq *rq, struct task_struct *p)
1628 int running)
1629{ 1631{
1630 int check_resched = 1; 1632 int check_resched = 1;
1631 1633
@@ -1636,7 +1638,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p,
1636 * If that current running task is also an RT task 1638 * If that current running task is also an RT task
1637 * then see if we can move to another run queue. 1639 * then see if we can move to another run queue.
1638 */ 1640 */
1639 if (!running) { 1641 if (p->se.on_rq && rq->curr != p) {
1640#ifdef CONFIG_SMP 1642#ifdef CONFIG_SMP
1641 if (rq->rt.overloaded && push_rt_task(rq) && 1643 if (rq->rt.overloaded && push_rt_task(rq) &&
1642 /* Don't resched if we changed runqueues */ 1644 /* Don't resched if we changed runqueues */
@@ -1652,10 +1654,13 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p,
1652 * Priority of the task has changed. This may cause 1654 * Priority of the task has changed. This may cause
1653 * us to initiate a push or pull. 1655 * us to initiate a push or pull.
1654 */ 1656 */
1655static void prio_changed_rt(struct rq *rq, struct task_struct *p, 1657static void
1656 int oldprio, int running) 1658prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
1657{ 1659{
1658 if (running) { 1660 if (!p->se.on_rq)
1661 return;
1662
1663 if (rq->curr == p) {
1659#ifdef CONFIG_SMP 1664#ifdef CONFIG_SMP
1660 /* 1665 /*
1661 * If our priority decreases while running, we 1666 * If our priority decreases while running, we
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
index 2bf6b47058c1..84ec9bcf82d9 100644
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -59,14 +59,13 @@ static void set_curr_task_stop(struct rq *rq)
59{ 59{
60} 60}
61 61
62static void switched_to_stop(struct rq *rq, struct task_struct *p, 62static void switched_to_stop(struct rq *rq, struct task_struct *p)
63 int running)
64{ 63{
65 BUG(); /* its impossible to change to this class */ 64 BUG(); /* its impossible to change to this class */
66} 65}
67 66
68static void prio_changed_stop(struct rq *rq, struct task_struct *p, 67static void
69 int oldprio, int running) 68prio_changed_stop(struct rq *rq, struct task_struct *p, int oldprio)
70{ 69{
71 BUG(); /* how!?, what priority? */ 70 BUG(); /* how!?, what priority? */
72} 71}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 68eb5efec388..56e5dec837f0 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -54,7 +54,7 @@ EXPORT_SYMBOL(irq_stat);
54 54
55static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; 55static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
56 56
57static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); 57DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
58 58
59char *softirq_to_name[NR_SOFTIRQS] = { 59char *softirq_to_name[NR_SOFTIRQS] = {
60 "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", 60 "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
@@ -311,9 +311,21 @@ void irq_enter(void)
311} 311}
312 312
313#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED 313#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
314# define invoke_softirq() __do_softirq() 314static inline void invoke_softirq(void)
315{
316 if (!force_irqthreads)
317 __do_softirq();
318 else
319 wakeup_softirqd();
320}
315#else 321#else
316# define invoke_softirq() do_softirq() 322static inline void invoke_softirq(void)
323{
324 if (!force_irqthreads)
325 do_softirq();
326 else
327 wakeup_softirqd();
328}
317#endif 329#endif
318 330
319/* 331/*
@@ -721,7 +733,6 @@ static int run_ksoftirqd(void * __bind_cpu)
721{ 733{
722 set_current_state(TASK_INTERRUPTIBLE); 734 set_current_state(TASK_INTERRUPTIBLE);
723 735
724 current->flags |= PF_KSOFTIRQD;
725 while (!kthread_should_stop()) { 736 while (!kthread_should_stop()) {
726 preempt_disable(); 737 preempt_disable();
727 if (!local_softirq_pending()) { 738 if (!local_softirq_pending()) {
@@ -738,7 +749,10 @@ static int run_ksoftirqd(void * __bind_cpu)
738 don't process */ 749 don't process */
739 if (cpu_is_offline((long)__bind_cpu)) 750 if (cpu_is_offline((long)__bind_cpu))
740 goto wait_to_die; 751 goto wait_to_die;
741 do_softirq(); 752 local_irq_disable();
753 if (local_softirq_pending())
754 __do_softirq();
755 local_irq_enable();
742 preempt_enable_no_resched(); 756 preempt_enable_no_resched();
743 cond_resched(); 757 cond_resched();
744 preempt_disable(); 758 preempt_disable();
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index c782fe9924c7..25cc41cd8f33 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -186,3 +186,8 @@ cond_syscall(sys_perf_event_open);
186/* fanotify! */ 186/* fanotify! */
187cond_syscall(sys_fanotify_init); 187cond_syscall(sys_fanotify_init);
188cond_syscall(sys_fanotify_mark); 188cond_syscall(sys_fanotify_mark);
189
190/* open by handle */
191cond_syscall(sys_name_to_handle_at);
192cond_syscall(sys_open_by_handle_at);
193cond_syscall(compat_sys_open_by_handle_at);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0f1bd83db985..51054fea5d99 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -194,9 +194,9 @@ static int sysrq_sysctl_handler(ctl_table *table, int write,
194static struct ctl_table root_table[]; 194static struct ctl_table root_table[];
195static struct ctl_table_root sysctl_table_root; 195static struct ctl_table_root sysctl_table_root;
196static struct ctl_table_header root_table_header = { 196static struct ctl_table_header root_table_header = {
197 .count = 1, 197 {{.count = 1,
198 .ctl_table = root_table, 198 .ctl_table = root_table,
199 .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list), 199 .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}},
200 .root = &sysctl_table_root, 200 .root = &sysctl_table_root,
201 .set = &sysctl_table_root.default_set, 201 .set = &sysctl_table_root.default_set,
202}; 202};
@@ -361,20 +361,13 @@ static struct ctl_table kern_table[] = {
361 .mode = 0644, 361 .mode = 0644,
362 .proc_handler = sched_rt_handler, 362 .proc_handler = sched_rt_handler,
363 }, 363 },
364 {
365 .procname = "sched_compat_yield",
366 .data = &sysctl_sched_compat_yield,
367 .maxlen = sizeof(unsigned int),
368 .mode = 0644,
369 .proc_handler = proc_dointvec,
370 },
371#ifdef CONFIG_SCHED_AUTOGROUP 364#ifdef CONFIG_SCHED_AUTOGROUP
372 { 365 {
373 .procname = "sched_autogroup_enabled", 366 .procname = "sched_autogroup_enabled",
374 .data = &sysctl_sched_autogroup_enabled, 367 .data = &sysctl_sched_autogroup_enabled,
375 .maxlen = sizeof(unsigned int), 368 .maxlen = sizeof(unsigned int),
376 .mode = 0644, 369 .mode = 0644,
377 .proc_handler = proc_dointvec, 370 .proc_handler = proc_dointvec_minmax,
378 .extra1 = &zero, 371 .extra1 = &zero,
379 .extra2 = &one, 372 .extra2 = &one,
380 }, 373 },
@@ -948,7 +941,7 @@ static struct ctl_table kern_table[] = {
948 .data = &sysctl_perf_event_sample_rate, 941 .data = &sysctl_perf_event_sample_rate,
949 .maxlen = sizeof(sysctl_perf_event_sample_rate), 942 .maxlen = sizeof(sysctl_perf_event_sample_rate),
950 .mode = 0644, 943 .mode = 0644,
951 .proc_handler = proc_dointvec, 944 .proc_handler = perf_proc_update_handler,
952 }, 945 },
953#endif 946#endif
954#ifdef CONFIG_KMEMCHECK 947#ifdef CONFIG_KMEMCHECK
@@ -1567,11 +1560,16 @@ void sysctl_head_get(struct ctl_table_header *head)
1567 spin_unlock(&sysctl_lock); 1560 spin_unlock(&sysctl_lock);
1568} 1561}
1569 1562
1563static void free_head(struct rcu_head *rcu)
1564{
1565 kfree(container_of(rcu, struct ctl_table_header, rcu));
1566}
1567
1570void sysctl_head_put(struct ctl_table_header *head) 1568void sysctl_head_put(struct ctl_table_header *head)
1571{ 1569{
1572 spin_lock(&sysctl_lock); 1570 spin_lock(&sysctl_lock);
1573 if (!--head->count) 1571 if (!--head->count)
1574 kfree(head); 1572 call_rcu(&head->rcu, free_head);
1575 spin_unlock(&sysctl_lock); 1573 spin_unlock(&sysctl_lock);
1576} 1574}
1577 1575
@@ -1948,10 +1946,10 @@ void unregister_sysctl_table(struct ctl_table_header * header)
1948 start_unregistering(header); 1946 start_unregistering(header);
1949 if (!--header->parent->count) { 1947 if (!--header->parent->count) {
1950 WARN_ON(1); 1948 WARN_ON(1);
1951 kfree(header->parent); 1949 call_rcu(&header->parent->rcu, free_head);
1952 } 1950 }
1953 if (!--header->count) 1951 if (!--header->count)
1954 kfree(header); 1952 call_rcu(&header->rcu, free_head);
1955 spin_unlock(&sysctl_lock); 1953 spin_unlock(&sysctl_lock);
1956} 1954}
1957 1955
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index b875bedf7c9a..3b8e028b9601 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1321,13 +1321,11 @@ static ssize_t binary_sysctl(const int *name, int nlen,
1321 void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) 1321 void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
1322{ 1322{
1323 const struct bin_table *table = NULL; 1323 const struct bin_table *table = NULL;
1324 struct nameidata nd;
1325 struct vfsmount *mnt; 1324 struct vfsmount *mnt;
1326 struct file *file; 1325 struct file *file;
1327 ssize_t result; 1326 ssize_t result;
1328 char *pathname; 1327 char *pathname;
1329 int flags; 1328 int flags;
1330 int acc_mode;
1331 1329
1332 pathname = sysctl_getname(name, nlen, &table); 1330 pathname = sysctl_getname(name, nlen, &table);
1333 result = PTR_ERR(pathname); 1331 result = PTR_ERR(pathname);
@@ -1337,28 +1335,17 @@ static ssize_t binary_sysctl(const int *name, int nlen,
1337 /* How should the sysctl be accessed? */ 1335 /* How should the sysctl be accessed? */
1338 if (oldval && oldlen && newval && newlen) { 1336 if (oldval && oldlen && newval && newlen) {
1339 flags = O_RDWR; 1337 flags = O_RDWR;
1340 acc_mode = MAY_READ | MAY_WRITE;
1341 } else if (newval && newlen) { 1338 } else if (newval && newlen) {
1342 flags = O_WRONLY; 1339 flags = O_WRONLY;
1343 acc_mode = MAY_WRITE;
1344 } else if (oldval && oldlen) { 1340 } else if (oldval && oldlen) {
1345 flags = O_RDONLY; 1341 flags = O_RDONLY;
1346 acc_mode = MAY_READ;
1347 } else { 1342 } else {
1348 result = 0; 1343 result = 0;
1349 goto out_putname; 1344 goto out_putname;
1350 } 1345 }
1351 1346
1352 mnt = current->nsproxy->pid_ns->proc_mnt; 1347 mnt = current->nsproxy->pid_ns->proc_mnt;
1353 result = vfs_path_lookup(mnt->mnt_root, mnt, pathname, 0, &nd); 1348 file = file_open_root(mnt->mnt_root, mnt, pathname, flags);
1354 if (result)
1355 goto out_putname;
1356
1357 result = may_open(&nd.path, acc_mode, flags);
1358 if (result)
1359 goto out_putpath;
1360
1361 file = dentry_open(nd.path.dentry, nd.path.mnt, flags, current_cred());
1362 result = PTR_ERR(file); 1349 result = PTR_ERR(file);
1363 if (IS_ERR(file)) 1350 if (IS_ERR(file))
1364 goto out_putname; 1351 goto out_putname;
@@ -1370,10 +1357,6 @@ out_putname:
1370 putname(pathname); 1357 putname(pathname);
1371out: 1358out:
1372 return result; 1359 return result;
1373
1374out_putpath:
1375 path_put(&nd.path);
1376 goto out_putname;
1377} 1360}
1378 1361
1379 1362
diff --git a/kernel/time.c b/kernel/time.c
index 32174359576f..8e8dc6d705c9 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -150,7 +150,7 @@ static inline void warp_clock(void)
150 * various programs will get confused when the clock gets warped. 150 * various programs will get confused when the clock gets warped.
151 */ 151 */
152 152
153int do_sys_settimeofday(struct timespec *tv, struct timezone *tz) 153int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz)
154{ 154{
155 static int firsttime = 1; 155 static int firsttime = 1;
156 int error = 0; 156 int error = 0;
@@ -645,7 +645,7 @@ u64 nsec_to_clock_t(u64 x)
645} 645}
646 646
647/** 647/**
648 * nsecs_to_jiffies - Convert nsecs in u64 to jiffies 648 * nsecs_to_jiffies64 - Convert nsecs in u64 to jiffies64
649 * 649 *
650 * @n: nsecs in u64 650 * @n: nsecs in u64
651 * 651 *
@@ -657,7 +657,7 @@ u64 nsec_to_clock_t(u64 x)
657 * NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512) 657 * NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
658 * ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years 658 * ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
659 */ 659 */
660unsigned long nsecs_to_jiffies(u64 n) 660u64 nsecs_to_jiffies64(u64 n)
661{ 661{
662#if (NSEC_PER_SEC % HZ) == 0 662#if (NSEC_PER_SEC % HZ) == 0
663 /* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */ 663 /* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */
@@ -674,22 +674,23 @@ unsigned long nsecs_to_jiffies(u64 n)
674#endif 674#endif
675} 675}
676 676
677#if (BITS_PER_LONG < 64) 677/**
678u64 get_jiffies_64(void) 678 * nsecs_to_jiffies - Convert nsecs in u64 to jiffies
679 *
680 * @n: nsecs in u64
681 *
682 * Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64.
683 * And this doesn't return MAX_JIFFY_OFFSET since this function is designed
684 * for scheduler, not for use in device drivers to calculate timeout value.
685 *
686 * note:
687 * NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
688 * ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
689 */
690unsigned long nsecs_to_jiffies(u64 n)
679{ 691{
680 unsigned long seq; 692 return (unsigned long)nsecs_to_jiffies64(n);
681 u64 ret;
682
683 do {
684 seq = read_seqbegin(&xtime_lock);
685 ret = jiffies_64;
686 } while (read_seqretry(&xtime_lock, seq));
687 return ret;
688} 693}
689EXPORT_SYMBOL(get_jiffies_64);
690#endif
691
692EXPORT_SYMBOL(jiffies);
693 694
694/* 695/*
695 * Add two timespec values and do a safety check for overflow. 696 * Add two timespec values and do a safety check for overflow.
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index ee266620b06c..b0425991e9ac 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,4 +1,5 @@
1obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o timeconv.o 1obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o
2obj-y += timeconv.o posix-clock.o
2 3
3obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o 4obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o
4obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o 5obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index d7395fdfb9f3..0d74b9ba90c8 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -18,7 +18,6 @@
18#include <linux/notifier.h> 18#include <linux/notifier.h>
19#include <linux/smp.h> 19#include <linux/smp.h>
20#include <linux/sysdev.h> 20#include <linux/sysdev.h>
21#include <linux/tick.h>
22 21
23#include "tick-internal.h" 22#include "tick-internal.h"
24 23
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 5404a8456909..b2fa506667c0 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -22,8 +22,11 @@
22************************************************************************/ 22************************************************************************/
23#include <linux/clocksource.h> 23#include <linux/clocksource.h>
24#include <linux/jiffies.h> 24#include <linux/jiffies.h>
25#include <linux/module.h>
25#include <linux/init.h> 26#include <linux/init.h>
26 27
28#include "tick-internal.h"
29
27/* The Jiffies based clocksource is the lowest common 30/* The Jiffies based clocksource is the lowest common
28 * denominator clock source which should function on 31 * denominator clock source which should function on
29 * all systems. It has the same coarse resolution as 32 * all systems. It has the same coarse resolution as
@@ -64,6 +67,23 @@ struct clocksource clocksource_jiffies = {
64 .shift = JIFFIES_SHIFT, 67 .shift = JIFFIES_SHIFT,
65}; 68};
66 69
70#if (BITS_PER_LONG < 64)
71u64 get_jiffies_64(void)
72{
73 unsigned long seq;
74 u64 ret;
75
76 do {
77 seq = read_seqbegin(&xtime_lock);
78 ret = jiffies_64;
79 } while (read_seqretry(&xtime_lock, seq));
80 return ret;
81}
82EXPORT_SYMBOL(get_jiffies_64);
83#endif
84
85EXPORT_SYMBOL(jiffies);
86
67static int __init init_jiffies_clocksource(void) 87static int __init init_jiffies_clocksource(void)
68{ 88{
69 return clocksource_register(&clocksource_jiffies); 89 return clocksource_register(&clocksource_jiffies);
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 5c00242fa921..5f1bb8e2008f 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -16,6 +16,8 @@
16#include <linux/mm.h> 16#include <linux/mm.h>
17#include <linux/module.h> 17#include <linux/module.h>
18 18
19#include "tick-internal.h"
20
19/* 21/*
20 * NTP timekeeping variables: 22 * NTP timekeeping variables:
21 */ 23 */
@@ -646,6 +648,17 @@ int do_adjtimex(struct timex *txc)
646 hrtimer_cancel(&leap_timer); 648 hrtimer_cancel(&leap_timer);
647 } 649 }
648 650
651 if (txc->modes & ADJ_SETOFFSET) {
652 struct timespec delta;
653 delta.tv_sec = txc->time.tv_sec;
654 delta.tv_nsec = txc->time.tv_usec;
655 if (!(txc->modes & ADJ_NANO))
656 delta.tv_nsec *= 1000;
657 result = timekeeping_inject_offset(&delta);
658 if (result)
659 return result;
660 }
661
649 getnstimeofday(&ts); 662 getnstimeofday(&ts);
650 663
651 write_seqlock_irq(&xtime_lock); 664 write_seqlock_irq(&xtime_lock);
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
new file mode 100644
index 000000000000..25028dd4fa18
--- /dev/null
+++ b/kernel/time/posix-clock.c
@@ -0,0 +1,451 @@
1/*
2 * posix-clock.c - support for dynamic clock devices
3 *
4 * Copyright (C) 2010 OMICRON electronics GmbH
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20#include <linux/device.h>
21#include <linux/file.h>
22#include <linux/mutex.h>
23#include <linux/posix-clock.h>
24#include <linux/slab.h>
25#include <linux/syscalls.h>
26#include <linux/uaccess.h>
27
28static void delete_clock(struct kref *kref);
29
30/*
31 * Returns NULL if the posix_clock instance attached to 'fp' is old and stale.
32 */
33static struct posix_clock *get_posix_clock(struct file *fp)
34{
35 struct posix_clock *clk = fp->private_data;
36
37 mutex_lock(&clk->mutex);
38
39 if (!clk->zombie)
40 return clk;
41
42 mutex_unlock(&clk->mutex);
43
44 return NULL;
45}
46
47static void put_posix_clock(struct posix_clock *clk)
48{
49 mutex_unlock(&clk->mutex);
50}
51
52static ssize_t posix_clock_read(struct file *fp, char __user *buf,
53 size_t count, loff_t *ppos)
54{
55 struct posix_clock *clk = get_posix_clock(fp);
56 int err = -EINVAL;
57
58 if (!clk)
59 return -ENODEV;
60
61 if (clk->ops.read)
62 err = clk->ops.read(clk, fp->f_flags, buf, count);
63
64 put_posix_clock(clk);
65
66 return err;
67}
68
69static unsigned int posix_clock_poll(struct file *fp, poll_table *wait)
70{
71 struct posix_clock *clk = get_posix_clock(fp);
72 int result = 0;
73
74 if (!clk)
75 return -ENODEV;
76
77 if (clk->ops.poll)
78 result = clk->ops.poll(clk, fp, wait);
79
80 put_posix_clock(clk);
81
82 return result;
83}
84
85static int posix_clock_fasync(int fd, struct file *fp, int on)
86{
87 struct posix_clock *clk = get_posix_clock(fp);
88 int err = 0;
89
90 if (!clk)
91 return -ENODEV;
92
93 if (clk->ops.fasync)
94 err = clk->ops.fasync(clk, fd, fp, on);
95
96 put_posix_clock(clk);
97
98 return err;
99}
100
101static int posix_clock_mmap(struct file *fp, struct vm_area_struct *vma)
102{
103 struct posix_clock *clk = get_posix_clock(fp);
104 int err = -ENODEV;
105
106 if (!clk)
107 return -ENODEV;
108
109 if (clk->ops.mmap)
110 err = clk->ops.mmap(clk, vma);
111
112 put_posix_clock(clk);
113
114 return err;
115}
116
117static long posix_clock_ioctl(struct file *fp,
118 unsigned int cmd, unsigned long arg)
119{
120 struct posix_clock *clk = get_posix_clock(fp);
121 int err = -ENOTTY;
122
123 if (!clk)
124 return -ENODEV;
125
126 if (clk->ops.ioctl)
127 err = clk->ops.ioctl(clk, cmd, arg);
128
129 put_posix_clock(clk);
130
131 return err;
132}
133
134#ifdef CONFIG_COMPAT
135static long posix_clock_compat_ioctl(struct file *fp,
136 unsigned int cmd, unsigned long arg)
137{
138 struct posix_clock *clk = get_posix_clock(fp);
139 int err = -ENOTTY;
140
141 if (!clk)
142 return -ENODEV;
143
144 if (clk->ops.ioctl)
145 err = clk->ops.ioctl(clk, cmd, arg);
146
147 put_posix_clock(clk);
148
149 return err;
150}
151#endif
152
153static int posix_clock_open(struct inode *inode, struct file *fp)
154{
155 int err;
156 struct posix_clock *clk =
157 container_of(inode->i_cdev, struct posix_clock, cdev);
158
159 mutex_lock(&clk->mutex);
160
161 if (clk->zombie) {
162 err = -ENODEV;
163 goto out;
164 }
165 if (clk->ops.open)
166 err = clk->ops.open(clk, fp->f_mode);
167 else
168 err = 0;
169
170 if (!err) {
171 kref_get(&clk->kref);
172 fp->private_data = clk;
173 }
174out:
175 mutex_unlock(&clk->mutex);
176 return err;
177}
178
179static int posix_clock_release(struct inode *inode, struct file *fp)
180{
181 struct posix_clock *clk = fp->private_data;
182 int err = 0;
183
184 if (clk->ops.release)
185 err = clk->ops.release(clk);
186
187 kref_put(&clk->kref, delete_clock);
188
189 fp->private_data = NULL;
190
191 return err;
192}
193
194static const struct file_operations posix_clock_file_operations = {
195 .owner = THIS_MODULE,
196 .llseek = no_llseek,
197 .read = posix_clock_read,
198 .poll = posix_clock_poll,
199 .unlocked_ioctl = posix_clock_ioctl,
200 .open = posix_clock_open,
201 .release = posix_clock_release,
202 .fasync = posix_clock_fasync,
203 .mmap = posix_clock_mmap,
204#ifdef CONFIG_COMPAT
205 .compat_ioctl = posix_clock_compat_ioctl,
206#endif
207};
208
209int posix_clock_register(struct posix_clock *clk, dev_t devid)
210{
211 int err;
212
213 kref_init(&clk->kref);
214 mutex_init(&clk->mutex);
215
216 cdev_init(&clk->cdev, &posix_clock_file_operations);
217 clk->cdev.owner = clk->ops.owner;
218 err = cdev_add(&clk->cdev, devid, 1);
219 if (err)
220 goto no_cdev;
221
222 return err;
223no_cdev:
224 mutex_destroy(&clk->mutex);
225 return err;
226}
227EXPORT_SYMBOL_GPL(posix_clock_register);
228
229static void delete_clock(struct kref *kref)
230{
231 struct posix_clock *clk = container_of(kref, struct posix_clock, kref);
232 mutex_destroy(&clk->mutex);
233 if (clk->release)
234 clk->release(clk);
235}
236
237void posix_clock_unregister(struct posix_clock *clk)
238{
239 cdev_del(&clk->cdev);
240
241 mutex_lock(&clk->mutex);
242 clk->zombie = true;
243 mutex_unlock(&clk->mutex);
244
245 kref_put(&clk->kref, delete_clock);
246}
247EXPORT_SYMBOL_GPL(posix_clock_unregister);
248
249struct posix_clock_desc {
250 struct file *fp;
251 struct posix_clock *clk;
252};
253
254static int get_clock_desc(const clockid_t id, struct posix_clock_desc *cd)
255{
256 struct file *fp = fget(CLOCKID_TO_FD(id));
257 int err = -EINVAL;
258
259 if (!fp)
260 return err;
261
262 if (fp->f_op->open != posix_clock_open || !fp->private_data)
263 goto out;
264
265 cd->fp = fp;
266 cd->clk = get_posix_clock(fp);
267
268 err = cd->clk ? 0 : -ENODEV;
269out:
270 if (err)
271 fput(fp);
272 return err;
273}
274
275static void put_clock_desc(struct posix_clock_desc *cd)
276{
277 put_posix_clock(cd->clk);
278 fput(cd->fp);
279}
280
281static int pc_clock_adjtime(clockid_t id, struct timex *tx)
282{
283 struct posix_clock_desc cd;
284 int err;
285
286 err = get_clock_desc(id, &cd);
287 if (err)
288 return err;
289
290 if ((cd.fp->f_mode & FMODE_WRITE) == 0) {
291 err = -EACCES;
292 goto out;
293 }
294
295 if (cd.clk->ops.clock_adjtime)
296 err = cd.clk->ops.clock_adjtime(cd.clk, tx);
297 else
298 err = -EOPNOTSUPP;
299out:
300 put_clock_desc(&cd);
301
302 return err;
303}
304
305static int pc_clock_gettime(clockid_t id, struct timespec *ts)
306{
307 struct posix_clock_desc cd;
308 int err;
309
310 err = get_clock_desc(id, &cd);
311 if (err)
312 return err;
313
314 if (cd.clk->ops.clock_gettime)
315 err = cd.clk->ops.clock_gettime(cd.clk, ts);
316 else
317 err = -EOPNOTSUPP;
318
319 put_clock_desc(&cd);
320
321 return err;
322}
323
324static int pc_clock_getres(clockid_t id, struct timespec *ts)
325{
326 struct posix_clock_desc cd;
327 int err;
328
329 err = get_clock_desc(id, &cd);
330 if (err)
331 return err;
332
333 if (cd.clk->ops.clock_getres)
334 err = cd.clk->ops.clock_getres(cd.clk, ts);
335 else
336 err = -EOPNOTSUPP;
337
338 put_clock_desc(&cd);
339
340 return err;
341}
342
343static int pc_clock_settime(clockid_t id, const struct timespec *ts)
344{
345 struct posix_clock_desc cd;
346 int err;
347
348 err = get_clock_desc(id, &cd);
349 if (err)
350 return err;
351
352 if ((cd.fp->f_mode & FMODE_WRITE) == 0) {
353 err = -EACCES;
354 goto out;
355 }
356
357 if (cd.clk->ops.clock_settime)
358 err = cd.clk->ops.clock_settime(cd.clk, ts);
359 else
360 err = -EOPNOTSUPP;
361out:
362 put_clock_desc(&cd);
363
364 return err;
365}
366
367static int pc_timer_create(struct k_itimer *kit)
368{
369 clockid_t id = kit->it_clock;
370 struct posix_clock_desc cd;
371 int err;
372
373 err = get_clock_desc(id, &cd);
374 if (err)
375 return err;
376
377 if (cd.clk->ops.timer_create)
378 err = cd.clk->ops.timer_create(cd.clk, kit);
379 else
380 err = -EOPNOTSUPP;
381
382 put_clock_desc(&cd);
383
384 return err;
385}
386
387static int pc_timer_delete(struct k_itimer *kit)
388{
389 clockid_t id = kit->it_clock;
390 struct posix_clock_desc cd;
391 int err;
392
393 err = get_clock_desc(id, &cd);
394 if (err)
395 return err;
396
397 if (cd.clk->ops.timer_delete)
398 err = cd.clk->ops.timer_delete(cd.clk, kit);
399 else
400 err = -EOPNOTSUPP;
401
402 put_clock_desc(&cd);
403
404 return err;
405}
406
407static void pc_timer_gettime(struct k_itimer *kit, struct itimerspec *ts)
408{
409 clockid_t id = kit->it_clock;
410 struct posix_clock_desc cd;
411
412 if (get_clock_desc(id, &cd))
413 return;
414
415 if (cd.clk->ops.timer_gettime)
416 cd.clk->ops.timer_gettime(cd.clk, kit, ts);
417
418 put_clock_desc(&cd);
419}
420
421static int pc_timer_settime(struct k_itimer *kit, int flags,
422 struct itimerspec *ts, struct itimerspec *old)
423{
424 clockid_t id = kit->it_clock;
425 struct posix_clock_desc cd;
426 int err;
427
428 err = get_clock_desc(id, &cd);
429 if (err)
430 return err;
431
432 if (cd.clk->ops.timer_settime)
433 err = cd.clk->ops.timer_settime(cd.clk, kit, flags, ts, old);
434 else
435 err = -EOPNOTSUPP;
436
437 put_clock_desc(&cd);
438
439 return err;
440}
441
442struct k_clock clock_posix_dynamic = {
443 .clock_getres = pc_clock_getres,
444 .clock_set = pc_clock_settime,
445 .clock_get = pc_clock_gettime,
446 .clock_adj = pc_clock_adjtime,
447 .timer_create = pc_timer_create,
448 .timer_set = pc_timer_settime,
449 .timer_del = pc_timer_delete,
450 .timer_get = pc_timer_gettime,
451};
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 48b2761b5668..da800ffa810c 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -18,7 +18,6 @@
18#include <linux/percpu.h> 18#include <linux/percpu.h>
19#include <linux/profile.h> 19#include <linux/profile.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/tick.h>
22 21
23#include "tick-internal.h" 22#include "tick-internal.h"
24 23
@@ -600,4 +599,14 @@ int tick_broadcast_oneshot_active(void)
600 return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT; 599 return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
601} 600}
602 601
602/*
603 * Check whether the broadcast device supports oneshot.
604 */
605bool tick_broadcast_oneshot_available(void)
606{
607 struct clock_event_device *bc = tick_broadcast_device.evtdev;
608
609 return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
610}
611
603#endif 612#endif
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 051bc80a0c43..119528de8235 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -18,7 +18,6 @@
18#include <linux/percpu.h> 18#include <linux/percpu.h>
19#include <linux/profile.h> 19#include <linux/profile.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/tick.h>
22 21
23#include <asm/irq_regs.h> 22#include <asm/irq_regs.h>
24 23
@@ -51,7 +50,11 @@ int tick_is_oneshot_available(void)
51{ 50{
52 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); 51 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
53 52
54 return dev && (dev->features & CLOCK_EVT_FEAT_ONESHOT); 53 if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT))
54 return 0;
55 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
56 return 1;
57 return tick_broadcast_oneshot_available();
55} 58}
56 59
57/* 60/*
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 290eefbc1f60..1009b06d6f89 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -1,6 +1,10 @@
1/* 1/*
2 * tick internal variable and functions used by low/high res code 2 * tick internal variable and functions used by low/high res code
3 */ 3 */
4#include <linux/hrtimer.h>
5#include <linux/tick.h>
6
7#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
4 8
5#define TICK_DO_TIMER_NONE -1 9#define TICK_DO_TIMER_NONE -1
6#define TICK_DO_TIMER_BOOT -2 10#define TICK_DO_TIMER_BOOT -2
@@ -36,6 +40,7 @@ extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
36extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); 40extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
37extern int tick_broadcast_oneshot_active(void); 41extern int tick_broadcast_oneshot_active(void);
38extern void tick_check_oneshot_broadcast(int cpu); 42extern void tick_check_oneshot_broadcast(int cpu);
43bool tick_broadcast_oneshot_available(void);
39# else /* BROADCAST */ 44# else /* BROADCAST */
40static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) 45static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
41{ 46{
@@ -46,6 +51,7 @@ static inline void tick_broadcast_switch_to_oneshot(void) { }
46static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } 51static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
47static inline int tick_broadcast_oneshot_active(void) { return 0; } 52static inline int tick_broadcast_oneshot_active(void) { return 0; }
48static inline void tick_check_oneshot_broadcast(int cpu) { } 53static inline void tick_check_oneshot_broadcast(int cpu) { }
54static inline bool tick_broadcast_oneshot_available(void) { return true; }
49# endif /* !BROADCAST */ 55# endif /* !BROADCAST */
50 56
51#else /* !ONESHOT */ 57#else /* !ONESHOT */
@@ -76,6 +82,7 @@ static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
76 return 0; 82 return 0;
77} 83}
78static inline int tick_broadcast_oneshot_active(void) { return 0; } 84static inline int tick_broadcast_oneshot_active(void) { return 0; }
85static inline bool tick_broadcast_oneshot_available(void) { return false; }
79#endif /* !TICK_ONESHOT */ 86#endif /* !TICK_ONESHOT */
80 87
81/* 88/*
@@ -132,3 +139,8 @@ static inline int tick_device_is_functional(struct clock_event_device *dev)
132{ 139{
133 return !(dev->features & CLOCK_EVT_FEAT_DUMMY); 140 return !(dev->features & CLOCK_EVT_FEAT_DUMMY);
134} 141}
142
143#endif
144
145extern void do_timer(unsigned long ticks);
146extern seqlock_t xtime_lock;
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 5cbc101f908b..2d04411a5f05 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -18,7 +18,6 @@
18#include <linux/percpu.h> 18#include <linux/percpu.h>
19#include <linux/profile.h> 19#include <linux/profile.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/tick.h>
22 21
23#include "tick-internal.h" 22#include "tick-internal.h"
24 23
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index c55ea2433471..d5097c44b407 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -19,7 +19,6 @@
19#include <linux/percpu.h> 19#include <linux/percpu.h>
20#include <linux/profile.h> 20#include <linux/profile.h>
21#include <linux/sched.h> 21#include <linux/sched.h>
22#include <linux/tick.h>
23#include <linux/module.h> 22#include <linux/module.h>
24 23
25#include <asm/irq_regs.h> 24#include <asm/irq_regs.h>
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index d27c7562902c..3bd7e3d5c632 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -353,7 +353,7 @@ EXPORT_SYMBOL(do_gettimeofday);
353 * 353 *
354 * Sets the time of day to the new time and update NTP and notify hrtimers 354 * Sets the time of day to the new time and update NTP and notify hrtimers
355 */ 355 */
356int do_settimeofday(struct timespec *tv) 356int do_settimeofday(const struct timespec *tv)
357{ 357{
358 struct timespec ts_delta; 358 struct timespec ts_delta;
359 unsigned long flags; 359 unsigned long flags;
@@ -387,6 +387,42 @@ int do_settimeofday(struct timespec *tv)
387 387
388EXPORT_SYMBOL(do_settimeofday); 388EXPORT_SYMBOL(do_settimeofday);
389 389
390
391/**
392 * timekeeping_inject_offset - Adds or subtracts from the current time.
393 * @tv: pointer to the timespec variable containing the offset
394 *
395 * Adds or subtracts an offset value from the current time.
396 */
397int timekeeping_inject_offset(struct timespec *ts)
398{
399 unsigned long flags;
400
401 if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
402 return -EINVAL;
403
404 write_seqlock_irqsave(&xtime_lock, flags);
405
406 timekeeping_forward_now();
407
408 xtime = timespec_add(xtime, *ts);
409 wall_to_monotonic = timespec_sub(wall_to_monotonic, *ts);
410
411 timekeeper.ntp_error = 0;
412 ntp_clear();
413
414 update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
415 timekeeper.mult);
416
417 write_sequnlock_irqrestore(&xtime_lock, flags);
418
419 /* signal hrtimers about time change */
420 clock_was_set();
421
422 return 0;
423}
424EXPORT_SYMBOL(timekeeping_inject_offset);
425
390/** 426/**
391 * change_clocksource - Swaps clocksources if a new one is available 427 * change_clocksource - Swaps clocksources if a new one is available
392 * 428 *
@@ -779,7 +815,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
779 * 815 *
780 * Called from the timer interrupt, must hold a write on xtime_lock. 816 * Called from the timer interrupt, must hold a write on xtime_lock.
781 */ 817 */
782void update_wall_time(void) 818static void update_wall_time(void)
783{ 819{
784 struct clocksource *clock; 820 struct clocksource *clock;
785 cycle_t offset; 821 cycle_t offset;
@@ -871,7 +907,7 @@ void update_wall_time(void)
871 * getboottime - Return the real time of system boot. 907 * getboottime - Return the real time of system boot.
872 * @ts: pointer to the timespec to be set 908 * @ts: pointer to the timespec to be set
873 * 909 *
874 * Returns the time of day in a timespec. 910 * Returns the wall-time of boot in a timespec.
875 * 911 *
876 * This is based on the wall_to_monotonic offset and the total suspend 912 * This is based on the wall_to_monotonic offset and the total suspend
877 * time. Calls to settimeofday will affect the value returned (which 913 * time. Calls to settimeofday will affect the value returned (which
@@ -889,6 +925,55 @@ void getboottime(struct timespec *ts)
889} 925}
890EXPORT_SYMBOL_GPL(getboottime); 926EXPORT_SYMBOL_GPL(getboottime);
891 927
928
929/**
930 * get_monotonic_boottime - Returns monotonic time since boot
931 * @ts: pointer to the timespec to be set
932 *
933 * Returns the monotonic time since boot in a timespec.
934 *
935 * This is similar to CLOCK_MONTONIC/ktime_get_ts, but also
936 * includes the time spent in suspend.
937 */
938void get_monotonic_boottime(struct timespec *ts)
939{
940 struct timespec tomono, sleep;
941 unsigned int seq;
942 s64 nsecs;
943
944 WARN_ON(timekeeping_suspended);
945
946 do {
947 seq = read_seqbegin(&xtime_lock);
948 *ts = xtime;
949 tomono = wall_to_monotonic;
950 sleep = total_sleep_time;
951 nsecs = timekeeping_get_ns();
952
953 } while (read_seqretry(&xtime_lock, seq));
954
955 set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec + sleep.tv_sec,
956 ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec + nsecs);
957}
958EXPORT_SYMBOL_GPL(get_monotonic_boottime);
959
960/**
961 * ktime_get_boottime - Returns monotonic time since boot in a ktime
962 *
963 * Returns the monotonic time since boot in a ktime
964 *
965 * This is similar to CLOCK_MONTONIC/ktime_get, but also
966 * includes the time spent in suspend.
967 */
968ktime_t ktime_get_boottime(void)
969{
970 struct timespec ts;
971
972 get_monotonic_boottime(&ts);
973 return timespec_to_ktime(ts);
974}
975EXPORT_SYMBOL_GPL(ktime_get_boottime);
976
892/** 977/**
893 * monotonic_to_bootbased - Convert the monotonic time to boot based. 978 * monotonic_to_bootbased - Convert the monotonic time to boot based.
894 * @ts: pointer to the timespec to be converted 979 * @ts: pointer to the timespec to be converted
@@ -910,11 +995,6 @@ struct timespec __current_kernel_time(void)
910 return xtime; 995 return xtime;
911} 996}
912 997
913struct timespec __get_wall_to_monotonic(void)
914{
915 return wall_to_monotonic;
916}
917
918struct timespec current_kernel_time(void) 998struct timespec current_kernel_time(void)
919{ 999{
920 struct timespec now; 1000 struct timespec now;
@@ -946,3 +1026,48 @@ struct timespec get_monotonic_coarse(void)
946 now.tv_nsec + mono.tv_nsec); 1026 now.tv_nsec + mono.tv_nsec);
947 return now; 1027 return now;
948} 1028}
1029
1030/*
1031 * The 64-bit jiffies value is not atomic - you MUST NOT read it
1032 * without sampling the sequence number in xtime_lock.
1033 * jiffies is defined in the linker script...
1034 */
1035void do_timer(unsigned long ticks)
1036{
1037 jiffies_64 += ticks;
1038 update_wall_time();
1039 calc_global_load(ticks);
1040}
1041
1042/**
1043 * get_xtime_and_monotonic_and_sleep_offset() - get xtime, wall_to_monotonic,
1044 * and sleep offsets.
1045 * @xtim: pointer to timespec to be set with xtime
1046 * @wtom: pointer to timespec to be set with wall_to_monotonic
1047 * @sleep: pointer to timespec to be set with time in suspend
1048 */
1049void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
1050 struct timespec *wtom, struct timespec *sleep)
1051{
1052 unsigned long seq;
1053
1054 do {
1055 seq = read_seqbegin(&xtime_lock);
1056 *xtim = xtime;
1057 *wtom = wall_to_monotonic;
1058 *sleep = total_sleep_time;
1059 } while (read_seqretry(&xtime_lock, seq));
1060}
1061
1062/**
1063 * xtime_update() - advances the timekeeping infrastructure
1064 * @ticks: number of ticks, that have elapsed since the last call.
1065 *
1066 * Must be called with interrupts disabled.
1067 */
1068void xtime_update(unsigned long ticks)
1069{
1070 write_seqlock(&xtime_lock);
1071 do_timer(ticks);
1072 write_sequnlock(&xtime_lock);
1073}
diff --git a/kernel/timer.c b/kernel/timer.c
index d6459923d245..fd6198692b57 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -404,6 +404,11 @@ static void timer_stats_account_timer(struct timer_list *timer) {}
404 404
405static struct debug_obj_descr timer_debug_descr; 405static struct debug_obj_descr timer_debug_descr;
406 406
407static void *timer_debug_hint(void *addr)
408{
409 return ((struct timer_list *) addr)->function;
410}
411
407/* 412/*
408 * fixup_init is called when: 413 * fixup_init is called when:
409 * - an active object is initialized 414 * - an active object is initialized
@@ -477,6 +482,7 @@ static int timer_fixup_free(void *addr, enum debug_obj_state state)
477 482
478static struct debug_obj_descr timer_debug_descr = { 483static struct debug_obj_descr timer_debug_descr = {
479 .name = "timer_list", 484 .name = "timer_list",
485 .debug_hint = timer_debug_hint,
480 .fixup_init = timer_fixup_init, 486 .fixup_init = timer_fixup_init,
481 .fixup_activate = timer_fixup_activate, 487 .fixup_activate = timer_fixup_activate,
482 .fixup_free = timer_fixup_free, 488 .fixup_free = timer_fixup_free,
@@ -964,6 +970,25 @@ EXPORT_SYMBOL(try_to_del_timer_sync);
964 * add_timer_on(). Upon exit the timer is not queued and the handler is 970 * add_timer_on(). Upon exit the timer is not queued and the handler is
965 * not running on any CPU. 971 * not running on any CPU.
966 * 972 *
973 * Note: You must not hold locks that are held in interrupt context
974 * while calling this function. Even if the lock has nothing to do
975 * with the timer in question. Here's why:
976 *
977 * CPU0 CPU1
978 * ---- ----
979 * <SOFTIRQ>
980 * call_timer_fn();
981 * base->running_timer = mytimer;
982 * spin_lock_irq(somelock);
983 * <IRQ>
984 * spin_lock(somelock);
985 * del_timer_sync(mytimer);
986 * while (base->running_timer == mytimer);
987 *
988 * Now del_timer_sync() will never return and never release somelock.
989 * The interrupt on the other CPU is waiting to grab somelock but
990 * it has interrupted the softirq that CPU0 is waiting to finish.
991 *
967 * The function returns whether it has deactivated a pending timer or not. 992 * The function returns whether it has deactivated a pending timer or not.
968 */ 993 */
969int del_timer_sync(struct timer_list *timer) 994int del_timer_sync(struct timer_list *timer)
@@ -971,6 +996,10 @@ int del_timer_sync(struct timer_list *timer)
971#ifdef CONFIG_LOCKDEP 996#ifdef CONFIG_LOCKDEP
972 unsigned long flags; 997 unsigned long flags;
973 998
999 /*
1000 * If lockdep gives a backtrace here, please reference
1001 * the synchronization rules above.
1002 */
974 local_irq_save(flags); 1003 local_irq_save(flags);
975 lock_map_acquire(&timer->lockdep_map); 1004 lock_map_acquire(&timer->lockdep_map);
976 lock_map_release(&timer->lockdep_map); 1005 lock_map_release(&timer->lockdep_map);
@@ -1295,19 +1324,6 @@ void run_local_timers(void)
1295 raise_softirq(TIMER_SOFTIRQ); 1324 raise_softirq(TIMER_SOFTIRQ);
1296} 1325}
1297 1326
1298/*
1299 * The 64-bit jiffies value is not atomic - you MUST NOT read it
1300 * without sampling the sequence number in xtime_lock.
1301 * jiffies is defined in the linker script...
1302 */
1303
1304void do_timer(unsigned long ticks)
1305{
1306 jiffies_64 += ticks;
1307 update_wall_time();
1308 calc_global_load(ticks);
1309}
1310
1311#ifdef __ARCH_WANT_SYS_ALARM 1327#ifdef __ARCH_WANT_SYS_ALARM
1312 1328
1313/* 1329/*
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index d95721f33702..cbafed7d4f38 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1827,21 +1827,5 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
1827 rwbs[i] = '\0'; 1827 rwbs[i] = '\0';
1828} 1828}
1829 1829
1830void blk_fill_rwbs_rq(char *rwbs, struct request *rq)
1831{
1832 int rw = rq->cmd_flags & 0x03;
1833 int bytes;
1834
1835 if (rq->cmd_flags & REQ_DISCARD)
1836 rw |= REQ_DISCARD;
1837
1838 if (rq->cmd_flags & REQ_SECURE)
1839 rw |= REQ_SECURE;
1840
1841 bytes = blk_rq_bytes(rq);
1842
1843 blk_fill_rwbs(rwbs, rw, bytes);
1844}
1845
1846#endif /* CONFIG_EVENT_TRACING */ 1830#endif /* CONFIG_EVENT_TRACING */
1847 1831
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f3dadae83883..888b611897d3 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3328,7 +3328,7 @@ static int start_graph_tracing(void)
3328 /* The cpu_boot init_task->ret_stack will never be freed */ 3328 /* The cpu_boot init_task->ret_stack will never be freed */
3329 for_each_online_cpu(cpu) { 3329 for_each_online_cpu(cpu) {
3330 if (!idle_task(cpu)->ret_stack) 3330 if (!idle_task(cpu)->ret_stack)
3331 ftrace_graph_init_task(idle_task(cpu)); 3331 ftrace_graph_init_idle_task(idle_task(cpu), cpu);
3332 } 3332 }
3333 3333
3334 do { 3334 do {
@@ -3418,6 +3418,49 @@ void unregister_ftrace_graph(void)
3418 mutex_unlock(&ftrace_lock); 3418 mutex_unlock(&ftrace_lock);
3419} 3419}
3420 3420
3421static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack);
3422
3423static void
3424graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
3425{
3426 atomic_set(&t->tracing_graph_pause, 0);
3427 atomic_set(&t->trace_overrun, 0);
3428 t->ftrace_timestamp = 0;
3429 /* make curr_ret_stack visable before we add the ret_stack */
3430 smp_wmb();
3431 t->ret_stack = ret_stack;
3432}
3433
3434/*
3435 * Allocate a return stack for the idle task. May be the first
3436 * time through, or it may be done by CPU hotplug online.
3437 */
3438void ftrace_graph_init_idle_task(struct task_struct *t, int cpu)
3439{
3440 t->curr_ret_stack = -1;
3441 /*
3442 * The idle task has no parent, it either has its own
3443 * stack or no stack at all.
3444 */
3445 if (t->ret_stack)
3446 WARN_ON(t->ret_stack != per_cpu(idle_ret_stack, cpu));
3447
3448 if (ftrace_graph_active) {
3449 struct ftrace_ret_stack *ret_stack;
3450
3451 ret_stack = per_cpu(idle_ret_stack, cpu);
3452 if (!ret_stack) {
3453 ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
3454 * sizeof(struct ftrace_ret_stack),
3455 GFP_KERNEL);
3456 if (!ret_stack)
3457 return;
3458 per_cpu(idle_ret_stack, cpu) = ret_stack;
3459 }
3460 graph_init_task(t, ret_stack);
3461 }
3462}
3463
3421/* Allocate a return stack for newly created task */ 3464/* Allocate a return stack for newly created task */
3422void ftrace_graph_init_task(struct task_struct *t) 3465void ftrace_graph_init_task(struct task_struct *t)
3423{ 3466{
@@ -3433,12 +3476,7 @@ void ftrace_graph_init_task(struct task_struct *t)
3433 GFP_KERNEL); 3476 GFP_KERNEL);
3434 if (!ret_stack) 3477 if (!ret_stack)
3435 return; 3478 return;
3436 atomic_set(&t->tracing_graph_pause, 0); 3479 graph_init_task(t, ret_stack);
3437 atomic_set(&t->trace_overrun, 0);
3438 t->ftrace_timestamp = 0;
3439 /* make curr_ret_stack visable before we add the ret_stack */
3440 smp_wmb();
3441 t->ret_stack = ret_stack;
3442 } 3480 }
3443} 3481}
3444 3482
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index bd1c35a4fbcc..db7b439d23ee 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -5,7 +5,6 @@
5 */ 5 */
6#include <linux/ring_buffer.h> 6#include <linux/ring_buffer.h>
7#include <linux/trace_clock.h> 7#include <linux/trace_clock.h>
8#include <linux/ftrace_irq.h>
9#include <linux/spinlock.h> 8#include <linux/spinlock.h>
10#include <linux/debugfs.h> 9#include <linux/debugfs.h>
11#include <linux/uaccess.h> 10#include <linux/uaccess.h>
@@ -1429,6 +1428,17 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1429} 1428}
1430EXPORT_SYMBOL_GPL(ring_buffer_resize); 1429EXPORT_SYMBOL_GPL(ring_buffer_resize);
1431 1430
1431void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val)
1432{
1433 mutex_lock(&buffer->mutex);
1434 if (val)
1435 buffer->flags |= RB_FL_OVERWRITE;
1436 else
1437 buffer->flags &= ~RB_FL_OVERWRITE;
1438 mutex_unlock(&buffer->mutex);
1439}
1440EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite);
1441
1432static inline void * 1442static inline void *
1433__rb_data_page_index(struct buffer_data_page *bpage, unsigned index) 1443__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
1434{ 1444{
@@ -2162,11 +2172,19 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2162 if (likely(ts >= cpu_buffer->write_stamp)) { 2172 if (likely(ts >= cpu_buffer->write_stamp)) {
2163 delta = diff; 2173 delta = diff;
2164 if (unlikely(test_time_stamp(delta))) { 2174 if (unlikely(test_time_stamp(delta))) {
2175 int local_clock_stable = 1;
2176#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
2177 local_clock_stable = sched_clock_stable;
2178#endif
2165 WARN_ONCE(delta > (1ULL << 59), 2179 WARN_ONCE(delta > (1ULL << 59),
2166 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", 2180 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
2167 (unsigned long long)delta, 2181 (unsigned long long)delta,
2168 (unsigned long long)ts, 2182 (unsigned long long)ts,
2169 (unsigned long long)cpu_buffer->write_stamp); 2183 (unsigned long long)cpu_buffer->write_stamp,
2184 local_clock_stable ? "" :
2185 "If you just came from a suspend/resume,\n"
2186 "please switch to the trace global clock:\n"
2187 " echo global > /sys/kernel/debug/tracing/trace_clock\n");
2170 add_timestamp = 1; 2188 add_timestamp = 1;
2171 } 2189 }
2172 } 2190 }
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index dc53ecb80589..9541c27c1cf2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -41,8 +41,6 @@
41#include "trace.h" 41#include "trace.h"
42#include "trace_output.h" 42#include "trace_output.h"
43 43
44#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
45
46/* 44/*
47 * On boot up, the ring buffer is set to the minimum size, so that 45 * On boot up, the ring buffer is set to the minimum size, so that
48 * we do not waste memory on systems that are not using tracing. 46 * we do not waste memory on systems that are not using tracing.
@@ -340,7 +338,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
340/* trace_flags holds trace_options default values */ 338/* trace_flags holds trace_options default values */
341unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | 339unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
342 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | 340 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
343 TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD; 341 TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE;
344 342
345static int trace_stop_count; 343static int trace_stop_count;
346static DEFINE_SPINLOCK(tracing_start_lock); 344static DEFINE_SPINLOCK(tracing_start_lock);
@@ -425,6 +423,7 @@ static const char *trace_options[] = {
425 "sleep-time", 423 "sleep-time",
426 "graph-time", 424 "graph-time",
427 "record-cmd", 425 "record-cmd",
426 "overwrite",
428 NULL 427 NULL
429}; 428};
430 429
@@ -780,6 +779,11 @@ __acquires(kernel_lock)
780 tracing_reset_online_cpus(tr); 779 tracing_reset_online_cpus(tr);
781 780
782 current_trace = type; 781 current_trace = type;
782
783 /* If we expanded the buffers, make sure the max is expanded too */
784 if (ring_buffer_expanded && type->use_max_tr)
785 ring_buffer_resize(max_tr.buffer, trace_buf_size);
786
783 /* the test is responsible for initializing and enabling */ 787 /* the test is responsible for initializing and enabling */
784 pr_info("Testing tracer %s: ", type->name); 788 pr_info("Testing tracer %s: ", type->name);
785 ret = type->selftest(type, tr); 789 ret = type->selftest(type, tr);
@@ -792,6 +796,10 @@ __acquires(kernel_lock)
792 /* Only reset on passing, to avoid touching corrupted buffers */ 796 /* Only reset on passing, to avoid touching corrupted buffers */
793 tracing_reset_online_cpus(tr); 797 tracing_reset_online_cpus(tr);
794 798
799 /* Shrink the max buffer again */
800 if (ring_buffer_expanded && type->use_max_tr)
801 ring_buffer_resize(max_tr.buffer, 1);
802
795 printk(KERN_CONT "PASSED\n"); 803 printk(KERN_CONT "PASSED\n");
796 } 804 }
797#endif 805#endif
@@ -1102,7 +1110,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1102 1110
1103 entry->preempt_count = pc & 0xff; 1111 entry->preempt_count = pc & 0xff;
1104 entry->pid = (tsk) ? tsk->pid : 0; 1112 entry->pid = (tsk) ? tsk->pid : 0;
1105 entry->lock_depth = (tsk) ? tsk->lock_depth : 0;
1106 entry->flags = 1113 entry->flags =
1107#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT 1114#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1108 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | 1115 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -1749,10 +1756,9 @@ static void print_lat_help_header(struct seq_file *m)
1749 seq_puts(m, "# | / _----=> need-resched \n"); 1756 seq_puts(m, "# | / _----=> need-resched \n");
1750 seq_puts(m, "# || / _---=> hardirq/softirq \n"); 1757 seq_puts(m, "# || / _---=> hardirq/softirq \n");
1751 seq_puts(m, "# ||| / _--=> preempt-depth \n"); 1758 seq_puts(m, "# ||| / _--=> preempt-depth \n");
1752 seq_puts(m, "# |||| /_--=> lock-depth \n"); 1759 seq_puts(m, "# |||| / delay \n");
1753 seq_puts(m, "# |||||/ delay \n"); 1760 seq_puts(m, "# cmd pid ||||| time | caller \n");
1754 seq_puts(m, "# cmd pid |||||| time | caller \n"); 1761 seq_puts(m, "# \\ / ||||| \\ | / \n");
1755 seq_puts(m, "# \\ / |||||| \\ | / \n");
1756} 1762}
1757 1763
1758static void print_func_help_header(struct seq_file *m) 1764static void print_func_help_header(struct seq_file *m)
@@ -2529,6 +2535,9 @@ static void set_tracer_flags(unsigned int mask, int enabled)
2529 2535
2530 if (mask == TRACE_ITER_RECORD_CMD) 2536 if (mask == TRACE_ITER_RECORD_CMD)
2531 trace_event_enable_cmd_record(enabled); 2537 trace_event_enable_cmd_record(enabled);
2538
2539 if (mask == TRACE_ITER_OVERWRITE)
2540 ring_buffer_change_overwrite(global_trace.buffer, enabled);
2532} 2541}
2533 2542
2534static ssize_t 2543static ssize_t
@@ -2710,6 +2719,10 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2710 2719
2711 mutex_lock(&trace_types_lock); 2720 mutex_lock(&trace_types_lock);
2712 if (tracer_enabled ^ val) { 2721 if (tracer_enabled ^ val) {
2722
2723 /* Only need to warn if this is used to change the state */
2724 WARN_ONCE(1, "tracing_enabled is deprecated. Use tracing_on");
2725
2713 if (val) { 2726 if (val) {
2714 tracer_enabled = 1; 2727 tracer_enabled = 1;
2715 if (current_trace->start) 2728 if (current_trace->start)
@@ -4551,9 +4564,11 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
4551__init static int tracer_alloc_buffers(void) 4564__init static int tracer_alloc_buffers(void)
4552{ 4565{
4553 int ring_buf_size; 4566 int ring_buf_size;
4567 enum ring_buffer_flags rb_flags;
4554 int i; 4568 int i;
4555 int ret = -ENOMEM; 4569 int ret = -ENOMEM;
4556 4570
4571
4557 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL)) 4572 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
4558 goto out; 4573 goto out;
4559 4574
@@ -4566,12 +4581,13 @@ __init static int tracer_alloc_buffers(void)
4566 else 4581 else
4567 ring_buf_size = 1; 4582 ring_buf_size = 1;
4568 4583
4584 rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
4585
4569 cpumask_copy(tracing_buffer_mask, cpu_possible_mask); 4586 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
4570 cpumask_copy(tracing_cpumask, cpu_all_mask); 4587 cpumask_copy(tracing_cpumask, cpu_all_mask);
4571 4588
4572 /* TODO: make the number of buffers hot pluggable with CPUS */ 4589 /* TODO: make the number of buffers hot pluggable with CPUS */
4573 global_trace.buffer = ring_buffer_alloc(ring_buf_size, 4590 global_trace.buffer = ring_buffer_alloc(ring_buf_size, rb_flags);
4574 TRACE_BUFFER_FLAGS);
4575 if (!global_trace.buffer) { 4591 if (!global_trace.buffer) {
4576 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n"); 4592 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
4577 WARN_ON(1); 4593 WARN_ON(1);
@@ -4581,7 +4597,7 @@ __init static int tracer_alloc_buffers(void)
4581 4597
4582 4598
4583#ifdef CONFIG_TRACER_MAX_TRACE 4599#ifdef CONFIG_TRACER_MAX_TRACE
4584 max_tr.buffer = ring_buffer_alloc(1, TRACE_BUFFER_FLAGS); 4600 max_tr.buffer = ring_buffer_alloc(1, rb_flags);
4585 if (!max_tr.buffer) { 4601 if (!max_tr.buffer) {
4586 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n"); 4602 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
4587 WARN_ON(1); 4603 WARN_ON(1);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 9021f8c0c0c3..5e9dfc6286dd 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -272,8 +272,8 @@ struct tracer {
272 /* If you handled the flag setting, return 0 */ 272 /* If you handled the flag setting, return 0 */
273 int (*set_flag)(u32 old_flags, u32 bit, int set); 273 int (*set_flag)(u32 old_flags, u32 bit, int set);
274 struct tracer *next; 274 struct tracer *next;
275 int print_max;
276 struct tracer_flags *flags; 275 struct tracer_flags *flags;
276 int print_max;
277 int use_max_tr; 277 int use_max_tr;
278}; 278};
279 279
@@ -606,6 +606,7 @@ enum trace_iterator_flags {
606 TRACE_ITER_SLEEP_TIME = 0x40000, 606 TRACE_ITER_SLEEP_TIME = 0x40000,
607 TRACE_ITER_GRAPH_TIME = 0x80000, 607 TRACE_ITER_GRAPH_TIME = 0x80000,
608 TRACE_ITER_RECORD_CMD = 0x100000, 608 TRACE_ITER_RECORD_CMD = 0x100000,
609 TRACE_ITER_OVERWRITE = 0x200000,
609}; 610};
610 611
611/* 612/*
@@ -661,8 +662,10 @@ struct ftrace_event_field {
661}; 662};
662 663
663struct event_filter { 664struct event_filter {
664 int n_preds; 665 int n_preds; /* Number assigned */
665 struct filter_pred **preds; 666 int a_preds; /* allocated */
667 struct filter_pred *preds;
668 struct filter_pred *root;
666 char *filter_string; 669 char *filter_string;
667}; 670};
668 671
@@ -674,11 +677,23 @@ struct event_subsystem {
674 int nr_events; 677 int nr_events;
675}; 678};
676 679
680#define FILTER_PRED_INVALID ((unsigned short)-1)
681#define FILTER_PRED_IS_RIGHT (1 << 15)
682#define FILTER_PRED_FOLD (1 << 15)
683
684/*
685 * The max preds is the size of unsigned short with
686 * two flags at the MSBs. One bit is used for both the IS_RIGHT
687 * and FOLD flags. The other is reserved.
688 *
689 * 2^14 preds is way more than enough.
690 */
691#define MAX_FILTER_PRED 16384
692
677struct filter_pred; 693struct filter_pred;
678struct regex; 694struct regex;
679 695
680typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event, 696typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event);
681 int val1, int val2);
682 697
683typedef int (*regex_match_func)(char *str, struct regex *r, int len); 698typedef int (*regex_match_func)(char *str, struct regex *r, int len);
684 699
@@ -700,11 +715,23 @@ struct filter_pred {
700 filter_pred_fn_t fn; 715 filter_pred_fn_t fn;
701 u64 val; 716 u64 val;
702 struct regex regex; 717 struct regex regex;
703 char *field_name; 718 /*
719 * Leaf nodes use field_name, ops is used by AND and OR
720 * nodes. The field_name is always freed when freeing a pred.
721 * We can overload field_name for ops and have it freed
722 * as well.
723 */
724 union {
725 char *field_name;
726 unsigned short *ops;
727 };
704 int offset; 728 int offset;
705 int not; 729 int not;
706 int op; 730 int op;
707 int pop_n; 731 unsigned short index;
732 unsigned short parent;
733 unsigned short left;
734 unsigned short right;
708}; 735};
709 736
710extern struct list_head ftrace_common_fields; 737extern struct list_head ftrace_common_fields;
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index 6cf223764be8..1516cb3ec549 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -109,12 +109,12 @@ FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry,
109 */ 109 */
110#define FTRACE_CTX_FIELDS \ 110#define FTRACE_CTX_FIELDS \
111 __field( unsigned int, prev_pid ) \ 111 __field( unsigned int, prev_pid ) \
112 __field( unsigned int, next_pid ) \
113 __field( unsigned int, next_cpu ) \
112 __field( unsigned char, prev_prio ) \ 114 __field( unsigned char, prev_prio ) \
113 __field( unsigned char, prev_state ) \ 115 __field( unsigned char, prev_state ) \
114 __field( unsigned int, next_pid ) \
115 __field( unsigned char, next_prio ) \ 116 __field( unsigned char, next_prio ) \
116 __field( unsigned char, next_state ) \ 117 __field( unsigned char, next_state )
117 __field( unsigned int, next_cpu )
118 118
119FTRACE_ENTRY(context_switch, ctx_switch_entry, 119FTRACE_ENTRY(context_switch, ctx_switch_entry,
120 120
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 5f499e0438a4..e88f74fe1d4c 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -116,7 +116,6 @@ static int trace_define_common_fields(void)
116 __common_field(unsigned char, flags); 116 __common_field(unsigned char, flags);
117 __common_field(unsigned char, preempt_count); 117 __common_field(unsigned char, preempt_count);
118 __common_field(int, pid); 118 __common_field(int, pid);
119 __common_field(int, lock_depth);
120 119
121 return ret; 120 return ret;
122} 121}
@@ -326,6 +325,7 @@ int trace_set_clr_event(const char *system, const char *event, int set)
326{ 325{
327 return __ftrace_set_clr_event(NULL, system, event, set); 326 return __ftrace_set_clr_event(NULL, system, event, set);
328} 327}
328EXPORT_SYMBOL_GPL(trace_set_clr_event);
329 329
330/* 128 should be much more than enough */ 330/* 128 should be much more than enough */
331#define EVENT_BUF_SIZE 127 331#define EVENT_BUF_SIZE 127
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 36d40104b17f..3249b4f77ef0 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -123,9 +123,13 @@ struct filter_parse_state {
123 } operand; 123 } operand;
124}; 124};
125 125
126struct pred_stack {
127 struct filter_pred **preds;
128 int index;
129};
130
126#define DEFINE_COMPARISON_PRED(type) \ 131#define DEFINE_COMPARISON_PRED(type) \
127static int filter_pred_##type(struct filter_pred *pred, void *event, \ 132static int filter_pred_##type(struct filter_pred *pred, void *event) \
128 int val1, int val2) \
129{ \ 133{ \
130 type *addr = (type *)(event + pred->offset); \ 134 type *addr = (type *)(event + pred->offset); \
131 type val = (type)pred->val; \ 135 type val = (type)pred->val; \
@@ -152,8 +156,7 @@ static int filter_pred_##type(struct filter_pred *pred, void *event, \
152} 156}
153 157
154#define DEFINE_EQUALITY_PRED(size) \ 158#define DEFINE_EQUALITY_PRED(size) \
155static int filter_pred_##size(struct filter_pred *pred, void *event, \ 159static int filter_pred_##size(struct filter_pred *pred, void *event) \
156 int val1, int val2) \
157{ \ 160{ \
158 u##size *addr = (u##size *)(event + pred->offset); \ 161 u##size *addr = (u##size *)(event + pred->offset); \
159 u##size val = (u##size)pred->val; \ 162 u##size val = (u##size)pred->val; \
@@ -178,23 +181,8 @@ DEFINE_EQUALITY_PRED(32);
178DEFINE_EQUALITY_PRED(16); 181DEFINE_EQUALITY_PRED(16);
179DEFINE_EQUALITY_PRED(8); 182DEFINE_EQUALITY_PRED(8);
180 183
181static int filter_pred_and(struct filter_pred *pred __attribute((unused)),
182 void *event __attribute((unused)),
183 int val1, int val2)
184{
185 return val1 && val2;
186}
187
188static int filter_pred_or(struct filter_pred *pred __attribute((unused)),
189 void *event __attribute((unused)),
190 int val1, int val2)
191{
192 return val1 || val2;
193}
194
195/* Filter predicate for fixed sized arrays of characters */ 184/* Filter predicate for fixed sized arrays of characters */
196static int filter_pred_string(struct filter_pred *pred, void *event, 185static int filter_pred_string(struct filter_pred *pred, void *event)
197 int val1, int val2)
198{ 186{
199 char *addr = (char *)(event + pred->offset); 187 char *addr = (char *)(event + pred->offset);
200 int cmp, match; 188 int cmp, match;
@@ -207,8 +195,7 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
207} 195}
208 196
209/* Filter predicate for char * pointers */ 197/* Filter predicate for char * pointers */
210static int filter_pred_pchar(struct filter_pred *pred, void *event, 198static int filter_pred_pchar(struct filter_pred *pred, void *event)
211 int val1, int val2)
212{ 199{
213 char **addr = (char **)(event + pred->offset); 200 char **addr = (char **)(event + pred->offset);
214 int cmp, match; 201 int cmp, match;
@@ -231,8 +218,7 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event,
231 * and add it to the address of the entry, and at last we have 218 * and add it to the address of the entry, and at last we have
232 * the address of the string. 219 * the address of the string.
233 */ 220 */
234static int filter_pred_strloc(struct filter_pred *pred, void *event, 221static int filter_pred_strloc(struct filter_pred *pred, void *event)
235 int val1, int val2)
236{ 222{
237 u32 str_item = *(u32 *)(event + pred->offset); 223 u32 str_item = *(u32 *)(event + pred->offset);
238 int str_loc = str_item & 0xffff; 224 int str_loc = str_item & 0xffff;
@@ -247,8 +233,7 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event,
247 return match; 233 return match;
248} 234}
249 235
250static int filter_pred_none(struct filter_pred *pred, void *event, 236static int filter_pred_none(struct filter_pred *pred, void *event)
251 int val1, int val2)
252{ 237{
253 return 0; 238 return 0;
254} 239}
@@ -377,32 +362,147 @@ static void filter_build_regex(struct filter_pred *pred)
377 pred->not ^= not; 362 pred->not ^= not;
378} 363}
379 364
365enum move_type {
366 MOVE_DOWN,
367 MOVE_UP_FROM_LEFT,
368 MOVE_UP_FROM_RIGHT
369};
370
371static struct filter_pred *
372get_pred_parent(struct filter_pred *pred, struct filter_pred *preds,
373 int index, enum move_type *move)
374{
375 if (pred->parent & FILTER_PRED_IS_RIGHT)
376 *move = MOVE_UP_FROM_RIGHT;
377 else
378 *move = MOVE_UP_FROM_LEFT;
379 pred = &preds[pred->parent & ~FILTER_PRED_IS_RIGHT];
380
381 return pred;
382}
383
384/*
385 * A series of AND or ORs where found together. Instead of
386 * climbing up and down the tree branches, an array of the
387 * ops were made in order of checks. We can just move across
388 * the array and short circuit if needed.
389 */
390static int process_ops(struct filter_pred *preds,
391 struct filter_pred *op, void *rec)
392{
393 struct filter_pred *pred;
394 int type;
395 int match;
396 int i;
397
398 /*
399 * Micro-optimization: We set type to true if op
400 * is an OR and false otherwise (AND). Then we
401 * just need to test if the match is equal to
402 * the type, and if it is, we can short circuit the
403 * rest of the checks:
404 *
405 * if ((match && op->op == OP_OR) ||
406 * (!match && op->op == OP_AND))
407 * return match;
408 */
409 type = op->op == OP_OR;
410
411 for (i = 0; i < op->val; i++) {
412 pred = &preds[op->ops[i]];
413 match = pred->fn(pred, rec);
414 if (!!match == type)
415 return match;
416 }
417 return match;
418}
419
380/* return 1 if event matches, 0 otherwise (discard) */ 420/* return 1 if event matches, 0 otherwise (discard) */
381int filter_match_preds(struct event_filter *filter, void *rec) 421int filter_match_preds(struct event_filter *filter, void *rec)
382{ 422{
383 int match, top = 0, val1 = 0, val2 = 0; 423 int match = -1;
384 int stack[MAX_FILTER_PRED]; 424 enum move_type move = MOVE_DOWN;
425 struct filter_pred *preds;
385 struct filter_pred *pred; 426 struct filter_pred *pred;
386 int i; 427 struct filter_pred *root;
428 int n_preds;
429 int done = 0;
430
431 /* no filter is considered a match */
432 if (!filter)
433 return 1;
434
435 n_preds = filter->n_preds;
436
437 if (!n_preds)
438 return 1;
439
440 /*
441 * n_preds, root and filter->preds are protect with preemption disabled.
442 */
443 preds = rcu_dereference_sched(filter->preds);
444 root = rcu_dereference_sched(filter->root);
445 if (!root)
446 return 1;
447
448 pred = root;
387 449
388 for (i = 0; i < filter->n_preds; i++) { 450 /* match is currently meaningless */
389 pred = filter->preds[i]; 451 match = -1;
390 if (!pred->pop_n) { 452
391 match = pred->fn(pred, rec, val1, val2); 453 do {
392 stack[top++] = match; 454 switch (move) {
455 case MOVE_DOWN:
456 /* only AND and OR have children */
457 if (pred->left != FILTER_PRED_INVALID) {
458 /* If ops is set, then it was folded. */
459 if (!pred->ops) {
460 /* keep going to down the left side */
461 pred = &preds[pred->left];
462 continue;
463 }
464 /* We can treat folded ops as a leaf node */
465 match = process_ops(preds, pred, rec);
466 } else
467 match = pred->fn(pred, rec);
468 /* If this pred is the only pred */
469 if (pred == root)
470 break;
471 pred = get_pred_parent(pred, preds,
472 pred->parent, &move);
473 continue;
474 case MOVE_UP_FROM_LEFT:
475 /*
476 * Check for short circuits.
477 *
478 * Optimization: !!match == (pred->op == OP_OR)
479 * is the same as:
480 * if ((match && pred->op == OP_OR) ||
481 * (!match && pred->op == OP_AND))
482 */
483 if (!!match == (pred->op == OP_OR)) {
484 if (pred == root)
485 break;
486 pred = get_pred_parent(pred, preds,
487 pred->parent, &move);
488 continue;
489 }
490 /* now go down the right side of the tree. */
491 pred = &preds[pred->right];
492 move = MOVE_DOWN;
493 continue;
494 case MOVE_UP_FROM_RIGHT:
495 /* We finished this equation. */
496 if (pred == root)
497 break;
498 pred = get_pred_parent(pred, preds,
499 pred->parent, &move);
393 continue; 500 continue;
394 } 501 }
395 if (pred->pop_n > top) { 502 done = 1;
396 WARN_ON_ONCE(1); 503 } while (!done);
397 return 0;
398 }
399 val1 = stack[--top];
400 val2 = stack[--top];
401 match = pred->fn(pred, rec, val1, val2);
402 stack[top++] = match;
403 }
404 504
405 return stack[--top]; 505 return match;
406} 506}
407EXPORT_SYMBOL_GPL(filter_match_preds); 507EXPORT_SYMBOL_GPL(filter_match_preds);
408 508
@@ -414,6 +514,9 @@ static void parse_error(struct filter_parse_state *ps, int err, int pos)
414 514
415static void remove_filter_string(struct event_filter *filter) 515static void remove_filter_string(struct event_filter *filter)
416{ 516{
517 if (!filter)
518 return;
519
417 kfree(filter->filter_string); 520 kfree(filter->filter_string);
418 filter->filter_string = NULL; 521 filter->filter_string = NULL;
419} 522}
@@ -473,9 +576,10 @@ static void append_filter_err(struct filter_parse_state *ps,
473 576
474void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s) 577void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
475{ 578{
476 struct event_filter *filter = call->filter; 579 struct event_filter *filter;
477 580
478 mutex_lock(&event_mutex); 581 mutex_lock(&event_mutex);
582 filter = call->filter;
479 if (filter && filter->filter_string) 583 if (filter && filter->filter_string)
480 trace_seq_printf(s, "%s\n", filter->filter_string); 584 trace_seq_printf(s, "%s\n", filter->filter_string);
481 else 585 else
@@ -486,9 +590,10 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
486void print_subsystem_event_filter(struct event_subsystem *system, 590void print_subsystem_event_filter(struct event_subsystem *system,
487 struct trace_seq *s) 591 struct trace_seq *s)
488{ 592{
489 struct event_filter *filter = system->filter; 593 struct event_filter *filter;
490 594
491 mutex_lock(&event_mutex); 595 mutex_lock(&event_mutex);
596 filter = system->filter;
492 if (filter && filter->filter_string) 597 if (filter && filter->filter_string)
493 trace_seq_printf(s, "%s\n", filter->filter_string); 598 trace_seq_printf(s, "%s\n", filter->filter_string);
494 else 599 else
@@ -539,10 +644,58 @@ static void filter_clear_pred(struct filter_pred *pred)
539 pred->regex.len = 0; 644 pred->regex.len = 0;
540} 645}
541 646
542static int filter_set_pred(struct filter_pred *dest, 647static int __alloc_pred_stack(struct pred_stack *stack, int n_preds)
648{
649 stack->preds = kzalloc(sizeof(*stack->preds)*(n_preds + 1), GFP_KERNEL);
650 if (!stack->preds)
651 return -ENOMEM;
652 stack->index = n_preds;
653 return 0;
654}
655
656static void __free_pred_stack(struct pred_stack *stack)
657{
658 kfree(stack->preds);
659 stack->index = 0;
660}
661
662static int __push_pred_stack(struct pred_stack *stack,
663 struct filter_pred *pred)
664{
665 int index = stack->index;
666
667 if (WARN_ON(index == 0))
668 return -ENOSPC;
669
670 stack->preds[--index] = pred;
671 stack->index = index;
672 return 0;
673}
674
675static struct filter_pred *
676__pop_pred_stack(struct pred_stack *stack)
677{
678 struct filter_pred *pred;
679 int index = stack->index;
680
681 pred = stack->preds[index++];
682 if (!pred)
683 return NULL;
684
685 stack->index = index;
686 return pred;
687}
688
689static int filter_set_pred(struct event_filter *filter,
690 int idx,
691 struct pred_stack *stack,
543 struct filter_pred *src, 692 struct filter_pred *src,
544 filter_pred_fn_t fn) 693 filter_pred_fn_t fn)
545{ 694{
695 struct filter_pred *dest = &filter->preds[idx];
696 struct filter_pred *left;
697 struct filter_pred *right;
698
546 *dest = *src; 699 *dest = *src;
547 if (src->field_name) { 700 if (src->field_name) {
548 dest->field_name = kstrdup(src->field_name, GFP_KERNEL); 701 dest->field_name = kstrdup(src->field_name, GFP_KERNEL);
@@ -550,116 +703,140 @@ static int filter_set_pred(struct filter_pred *dest,
550 return -ENOMEM; 703 return -ENOMEM;
551 } 704 }
552 dest->fn = fn; 705 dest->fn = fn;
706 dest->index = idx;
553 707
554 return 0; 708 if (dest->op == OP_OR || dest->op == OP_AND) {
709 right = __pop_pred_stack(stack);
710 left = __pop_pred_stack(stack);
711 if (!left || !right)
712 return -EINVAL;
713 /*
714 * If both children can be folded
715 * and they are the same op as this op or a leaf,
716 * then this op can be folded.
717 */
718 if (left->index & FILTER_PRED_FOLD &&
719 (left->op == dest->op ||
720 left->left == FILTER_PRED_INVALID) &&
721 right->index & FILTER_PRED_FOLD &&
722 (right->op == dest->op ||
723 right->left == FILTER_PRED_INVALID))
724 dest->index |= FILTER_PRED_FOLD;
725
726 dest->left = left->index & ~FILTER_PRED_FOLD;
727 dest->right = right->index & ~FILTER_PRED_FOLD;
728 left->parent = dest->index & ~FILTER_PRED_FOLD;
729 right->parent = dest->index | FILTER_PRED_IS_RIGHT;
730 } else {
731 /*
732 * Make dest->left invalid to be used as a quick
733 * way to know this is a leaf node.
734 */
735 dest->left = FILTER_PRED_INVALID;
736
737 /* All leafs allow folding the parent ops. */
738 dest->index |= FILTER_PRED_FOLD;
739 }
740
741 return __push_pred_stack(stack, dest);
555} 742}
556 743
557static void filter_disable_preds(struct ftrace_event_call *call) 744static void __free_preds(struct event_filter *filter)
558{ 745{
559 struct event_filter *filter = call->filter;
560 int i; 746 int i;
561 747
562 call->flags &= ~TRACE_EVENT_FL_FILTERED; 748 if (filter->preds) {
749 for (i = 0; i < filter->a_preds; i++)
750 kfree(filter->preds[i].field_name);
751 kfree(filter->preds);
752 filter->preds = NULL;
753 }
754 filter->a_preds = 0;
563 filter->n_preds = 0; 755 filter->n_preds = 0;
564
565 for (i = 0; i < MAX_FILTER_PRED; i++)
566 filter->preds[i]->fn = filter_pred_none;
567} 756}
568 757
569static void __free_preds(struct event_filter *filter) 758static void filter_disable(struct ftrace_event_call *call)
570{ 759{
571 int i; 760 call->flags &= ~TRACE_EVENT_FL_FILTERED;
761}
572 762
763static void __free_filter(struct event_filter *filter)
764{
573 if (!filter) 765 if (!filter)
574 return; 766 return;
575 767
576 for (i = 0; i < MAX_FILTER_PRED; i++) { 768 __free_preds(filter);
577 if (filter->preds[i])
578 filter_free_pred(filter->preds[i]);
579 }
580 kfree(filter->preds);
581 kfree(filter->filter_string); 769 kfree(filter->filter_string);
582 kfree(filter); 770 kfree(filter);
583} 771}
584 772
773/*
774 * Called when destroying the ftrace_event_call.
775 * The call is being freed, so we do not need to worry about
776 * the call being currently used. This is for module code removing
777 * the tracepoints from within it.
778 */
585void destroy_preds(struct ftrace_event_call *call) 779void destroy_preds(struct ftrace_event_call *call)
586{ 780{
587 __free_preds(call->filter); 781 __free_filter(call->filter);
588 call->filter = NULL; 782 call->filter = NULL;
589 call->flags &= ~TRACE_EVENT_FL_FILTERED;
590} 783}
591 784
592static struct event_filter *__alloc_preds(void) 785static struct event_filter *__alloc_filter(void)
593{ 786{
594 struct event_filter *filter; 787 struct event_filter *filter;
788
789 filter = kzalloc(sizeof(*filter), GFP_KERNEL);
790 return filter;
791}
792
793static int __alloc_preds(struct event_filter *filter, int n_preds)
794{
595 struct filter_pred *pred; 795 struct filter_pred *pred;
596 int i; 796 int i;
597 797
598 filter = kzalloc(sizeof(*filter), GFP_KERNEL); 798 if (filter->preds)
599 if (!filter) 799 __free_preds(filter);
600 return ERR_PTR(-ENOMEM);
601 800
602 filter->n_preds = 0; 801 filter->preds =
802 kzalloc(sizeof(*filter->preds) * n_preds, GFP_KERNEL);
603 803
604 filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL);
605 if (!filter->preds) 804 if (!filter->preds)
606 goto oom; 805 return -ENOMEM;
607 806
608 for (i = 0; i < MAX_FILTER_PRED; i++) { 807 filter->a_preds = n_preds;
609 pred = kzalloc(sizeof(*pred), GFP_KERNEL); 808 filter->n_preds = 0;
610 if (!pred) 809
611 goto oom; 810 for (i = 0; i < n_preds; i++) {
811 pred = &filter->preds[i];
612 pred->fn = filter_pred_none; 812 pred->fn = filter_pred_none;
613 filter->preds[i] = pred;
614 } 813 }
615 814
616 return filter;
617
618oom:
619 __free_preds(filter);
620 return ERR_PTR(-ENOMEM);
621}
622
623static int init_preds(struct ftrace_event_call *call)
624{
625 if (call->filter)
626 return 0;
627
628 call->flags &= ~TRACE_EVENT_FL_FILTERED;
629 call->filter = __alloc_preds();
630 if (IS_ERR(call->filter))
631 return PTR_ERR(call->filter);
632
633 return 0; 815 return 0;
634} 816}
635 817
636static int init_subsystem_preds(struct event_subsystem *system) 818static void filter_free_subsystem_preds(struct event_subsystem *system)
637{ 819{
638 struct ftrace_event_call *call; 820 struct ftrace_event_call *call;
639 int err;
640 821
641 list_for_each_entry(call, &ftrace_events, list) { 822 list_for_each_entry(call, &ftrace_events, list) {
642 if (strcmp(call->class->system, system->name) != 0) 823 if (strcmp(call->class->system, system->name) != 0)
643 continue; 824 continue;
644 825
645 err = init_preds(call); 826 filter_disable(call);
646 if (err) 827 remove_filter_string(call->filter);
647 return err;
648 } 828 }
649
650 return 0;
651} 829}
652 830
653static void filter_free_subsystem_preds(struct event_subsystem *system) 831static void filter_free_subsystem_filters(struct event_subsystem *system)
654{ 832{
655 struct ftrace_event_call *call; 833 struct ftrace_event_call *call;
656 834
657 list_for_each_entry(call, &ftrace_events, list) { 835 list_for_each_entry(call, &ftrace_events, list) {
658 if (strcmp(call->class->system, system->name) != 0) 836 if (strcmp(call->class->system, system->name) != 0)
659 continue; 837 continue;
660 838 __free_filter(call->filter);
661 filter_disable_preds(call); 839 call->filter = NULL;
662 remove_filter_string(call->filter);
663 } 840 }
664} 841}
665 842
@@ -667,18 +844,19 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
667 struct ftrace_event_call *call, 844 struct ftrace_event_call *call,
668 struct event_filter *filter, 845 struct event_filter *filter,
669 struct filter_pred *pred, 846 struct filter_pred *pred,
847 struct pred_stack *stack,
670 filter_pred_fn_t fn) 848 filter_pred_fn_t fn)
671{ 849{
672 int idx, err; 850 int idx, err;
673 851
674 if (filter->n_preds == MAX_FILTER_PRED) { 852 if (WARN_ON(filter->n_preds == filter->a_preds)) {
675 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); 853 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
676 return -ENOSPC; 854 return -ENOSPC;
677 } 855 }
678 856
679 idx = filter->n_preds; 857 idx = filter->n_preds;
680 filter_clear_pred(filter->preds[idx]); 858 filter_clear_pred(&filter->preds[idx]);
681 err = filter_set_pred(filter->preds[idx], pred, fn); 859 err = filter_set_pred(filter, idx, stack, pred, fn);
682 if (err) 860 if (err)
683 return err; 861 return err;
684 862
@@ -763,6 +941,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
763 struct ftrace_event_call *call, 941 struct ftrace_event_call *call,
764 struct event_filter *filter, 942 struct event_filter *filter,
765 struct filter_pred *pred, 943 struct filter_pred *pred,
944 struct pred_stack *stack,
766 bool dry_run) 945 bool dry_run)
767{ 946{
768 struct ftrace_event_field *field; 947 struct ftrace_event_field *field;
@@ -770,17 +949,12 @@ static int filter_add_pred(struct filter_parse_state *ps,
770 unsigned long long val; 949 unsigned long long val;
771 int ret; 950 int ret;
772 951
773 pred->fn = filter_pred_none; 952 fn = pred->fn = filter_pred_none;
774 953
775 if (pred->op == OP_AND) { 954 if (pred->op == OP_AND)
776 pred->pop_n = 2;
777 fn = filter_pred_and;
778 goto add_pred_fn; 955 goto add_pred_fn;
779 } else if (pred->op == OP_OR) { 956 else if (pred->op == OP_OR)
780 pred->pop_n = 2;
781 fn = filter_pred_or;
782 goto add_pred_fn; 957 goto add_pred_fn;
783 }
784 958
785 field = find_event_field(call, pred->field_name); 959 field = find_event_field(call, pred->field_name);
786 if (!field) { 960 if (!field) {
@@ -829,7 +1003,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
829 1003
830add_pred_fn: 1004add_pred_fn:
831 if (!dry_run) 1005 if (!dry_run)
832 return filter_add_pred_fn(ps, call, filter, pred, fn); 1006 return filter_add_pred_fn(ps, call, filter, pred, stack, fn);
833 return 0; 1007 return 0;
834} 1008}
835 1009
@@ -1187,6 +1361,234 @@ static int check_preds(struct filter_parse_state *ps)
1187 return 0; 1361 return 0;
1188} 1362}
1189 1363
1364static int count_preds(struct filter_parse_state *ps)
1365{
1366 struct postfix_elt *elt;
1367 int n_preds = 0;
1368
1369 list_for_each_entry(elt, &ps->postfix, list) {
1370 if (elt->op == OP_NONE)
1371 continue;
1372 n_preds++;
1373 }
1374
1375 return n_preds;
1376}
1377
1378/*
1379 * The tree is walked at filtering of an event. If the tree is not correctly
1380 * built, it may cause an infinite loop. Check here that the tree does
1381 * indeed terminate.
1382 */
1383static int check_pred_tree(struct event_filter *filter,
1384 struct filter_pred *root)
1385{
1386 struct filter_pred *preds;
1387 struct filter_pred *pred;
1388 enum move_type move = MOVE_DOWN;
1389 int count = 0;
1390 int done = 0;
1391 int max;
1392
1393 /*
1394 * The max that we can hit a node is three times.
1395 * Once going down, once coming up from left, and
1396 * once coming up from right. This is more than enough
1397 * since leafs are only hit a single time.
1398 */
1399 max = 3 * filter->n_preds;
1400
1401 preds = filter->preds;
1402 if (!preds)
1403 return -EINVAL;
1404 pred = root;
1405
1406 do {
1407 if (WARN_ON(count++ > max))
1408 return -EINVAL;
1409
1410 switch (move) {
1411 case MOVE_DOWN:
1412 if (pred->left != FILTER_PRED_INVALID) {
1413 pred = &preds[pred->left];
1414 continue;
1415 }
1416 /* A leaf at the root is just a leaf in the tree */
1417 if (pred == root)
1418 break;
1419 pred = get_pred_parent(pred, preds,
1420 pred->parent, &move);
1421 continue;
1422 case MOVE_UP_FROM_LEFT:
1423 pred = &preds[pred->right];
1424 move = MOVE_DOWN;
1425 continue;
1426 case MOVE_UP_FROM_RIGHT:
1427 if (pred == root)
1428 break;
1429 pred = get_pred_parent(pred, preds,
1430 pred->parent, &move);
1431 continue;
1432 }
1433 done = 1;
1434 } while (!done);
1435
1436 /* We are fine. */
1437 return 0;
1438}
1439
1440static int count_leafs(struct filter_pred *preds, struct filter_pred *root)
1441{
1442 struct filter_pred *pred;
1443 enum move_type move = MOVE_DOWN;
1444 int count = 0;
1445 int done = 0;
1446
1447 pred = root;
1448
1449 do {
1450 switch (move) {
1451 case MOVE_DOWN:
1452 if (pred->left != FILTER_PRED_INVALID) {
1453 pred = &preds[pred->left];
1454 continue;
1455 }
1456 /* A leaf at the root is just a leaf in the tree */
1457 if (pred == root)
1458 return 1;
1459 count++;
1460 pred = get_pred_parent(pred, preds,
1461 pred->parent, &move);
1462 continue;
1463 case MOVE_UP_FROM_LEFT:
1464 pred = &preds[pred->right];
1465 move = MOVE_DOWN;
1466 continue;
1467 case MOVE_UP_FROM_RIGHT:
1468 if (pred == root)
1469 break;
1470 pred = get_pred_parent(pred, preds,
1471 pred->parent, &move);
1472 continue;
1473 }
1474 done = 1;
1475 } while (!done);
1476
1477 return count;
1478}
1479
1480static int fold_pred(struct filter_pred *preds, struct filter_pred *root)
1481{
1482 struct filter_pred *pred;
1483 enum move_type move = MOVE_DOWN;
1484 int count = 0;
1485 int children;
1486 int done = 0;
1487
1488 /* No need to keep the fold flag */
1489 root->index &= ~FILTER_PRED_FOLD;
1490
1491 /* If the root is a leaf then do nothing */
1492 if (root->left == FILTER_PRED_INVALID)
1493 return 0;
1494
1495 /* count the children */
1496 children = count_leafs(preds, &preds[root->left]);
1497 children += count_leafs(preds, &preds[root->right]);
1498
1499 root->ops = kzalloc(sizeof(*root->ops) * children, GFP_KERNEL);
1500 if (!root->ops)
1501 return -ENOMEM;
1502
1503 root->val = children;
1504
1505 pred = root;
1506 do {
1507 switch (move) {
1508 case MOVE_DOWN:
1509 if (pred->left != FILTER_PRED_INVALID) {
1510 pred = &preds[pred->left];
1511 continue;
1512 }
1513 if (WARN_ON(count == children))
1514 return -EINVAL;
1515 pred->index &= ~FILTER_PRED_FOLD;
1516 root->ops[count++] = pred->index;
1517 pred = get_pred_parent(pred, preds,
1518 pred->parent, &move);
1519 continue;
1520 case MOVE_UP_FROM_LEFT:
1521 pred = &preds[pred->right];
1522 move = MOVE_DOWN;
1523 continue;
1524 case MOVE_UP_FROM_RIGHT:
1525 if (pred == root)
1526 break;
1527 pred = get_pred_parent(pred, preds,
1528 pred->parent, &move);
1529 continue;
1530 }
1531 done = 1;
1532 } while (!done);
1533
1534 return 0;
1535}
1536
1537/*
1538 * To optimize the processing of the ops, if we have several "ors" or
1539 * "ands" together, we can put them in an array and process them all
1540 * together speeding up the filter logic.
1541 */
1542static int fold_pred_tree(struct event_filter *filter,
1543 struct filter_pred *root)
1544{
1545 struct filter_pred *preds;
1546 struct filter_pred *pred;
1547 enum move_type move = MOVE_DOWN;
1548 int done = 0;
1549 int err;
1550
1551 preds = filter->preds;
1552 if (!preds)
1553 return -EINVAL;
1554 pred = root;
1555
1556 do {
1557 switch (move) {
1558 case MOVE_DOWN:
1559 if (pred->index & FILTER_PRED_FOLD) {
1560 err = fold_pred(preds, pred);
1561 if (err)
1562 return err;
1563 /* Folded nodes are like leafs */
1564 } else if (pred->left != FILTER_PRED_INVALID) {
1565 pred = &preds[pred->left];
1566 continue;
1567 }
1568
1569 /* A leaf at the root is just a leaf in the tree */
1570 if (pred == root)
1571 break;
1572 pred = get_pred_parent(pred, preds,
1573 pred->parent, &move);
1574 continue;
1575 case MOVE_UP_FROM_LEFT:
1576 pred = &preds[pred->right];
1577 move = MOVE_DOWN;
1578 continue;
1579 case MOVE_UP_FROM_RIGHT:
1580 if (pred == root)
1581 break;
1582 pred = get_pred_parent(pred, preds,
1583 pred->parent, &move);
1584 continue;
1585 }
1586 done = 1;
1587 } while (!done);
1588
1589 return 0;
1590}
1591
1190static int replace_preds(struct ftrace_event_call *call, 1592static int replace_preds(struct ftrace_event_call *call,
1191 struct event_filter *filter, 1593 struct event_filter *filter,
1192 struct filter_parse_state *ps, 1594 struct filter_parse_state *ps,
@@ -1195,14 +1597,32 @@ static int replace_preds(struct ftrace_event_call *call,
1195{ 1597{
1196 char *operand1 = NULL, *operand2 = NULL; 1598 char *operand1 = NULL, *operand2 = NULL;
1197 struct filter_pred *pred; 1599 struct filter_pred *pred;
1600 struct filter_pred *root;
1198 struct postfix_elt *elt; 1601 struct postfix_elt *elt;
1602 struct pred_stack stack = { }; /* init to NULL */
1199 int err; 1603 int err;
1200 int n_preds = 0; 1604 int n_preds = 0;
1201 1605
1606 n_preds = count_preds(ps);
1607 if (n_preds >= MAX_FILTER_PRED) {
1608 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
1609 return -ENOSPC;
1610 }
1611
1202 err = check_preds(ps); 1612 err = check_preds(ps);
1203 if (err) 1613 if (err)
1204 return err; 1614 return err;
1205 1615
1616 if (!dry_run) {
1617 err = __alloc_pred_stack(&stack, n_preds);
1618 if (err)
1619 return err;
1620 err = __alloc_preds(filter, n_preds);
1621 if (err)
1622 goto fail;
1623 }
1624
1625 n_preds = 0;
1206 list_for_each_entry(elt, &ps->postfix, list) { 1626 list_for_each_entry(elt, &ps->postfix, list) {
1207 if (elt->op == OP_NONE) { 1627 if (elt->op == OP_NONE) {
1208 if (!operand1) 1628 if (!operand1)
@@ -1211,14 +1631,16 @@ static int replace_preds(struct ftrace_event_call *call,
1211 operand2 = elt->operand; 1631 operand2 = elt->operand;
1212 else { 1632 else {
1213 parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0); 1633 parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0);
1214 return -EINVAL; 1634 err = -EINVAL;
1635 goto fail;
1215 } 1636 }
1216 continue; 1637 continue;
1217 } 1638 }
1218 1639
1219 if (n_preds++ == MAX_FILTER_PRED) { 1640 if (WARN_ON(n_preds++ == MAX_FILTER_PRED)) {
1220 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); 1641 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
1221 return -ENOSPC; 1642 err = -ENOSPC;
1643 goto fail;
1222 } 1644 }
1223 1645
1224 if (elt->op == OP_AND || elt->op == OP_OR) { 1646 if (elt->op == OP_AND || elt->op == OP_OR) {
@@ -1228,76 +1650,181 @@ static int replace_preds(struct ftrace_event_call *call,
1228 1650
1229 if (!operand1 || !operand2) { 1651 if (!operand1 || !operand2) {
1230 parse_error(ps, FILT_ERR_MISSING_FIELD, 0); 1652 parse_error(ps, FILT_ERR_MISSING_FIELD, 0);
1231 return -EINVAL; 1653 err = -EINVAL;
1654 goto fail;
1232 } 1655 }
1233 1656
1234 pred = create_pred(elt->op, operand1, operand2); 1657 pred = create_pred(elt->op, operand1, operand2);
1235add_pred: 1658add_pred:
1236 if (!pred) 1659 if (!pred) {
1237 return -ENOMEM; 1660 err = -ENOMEM;
1238 err = filter_add_pred(ps, call, filter, pred, dry_run); 1661 goto fail;
1662 }
1663 err = filter_add_pred(ps, call, filter, pred, &stack, dry_run);
1239 filter_free_pred(pred); 1664 filter_free_pred(pred);
1240 if (err) 1665 if (err)
1241 return err; 1666 goto fail;
1242 1667
1243 operand1 = operand2 = NULL; 1668 operand1 = operand2 = NULL;
1244 } 1669 }
1245 1670
1246 return 0; 1671 if (!dry_run) {
1672 /* We should have one item left on the stack */
1673 pred = __pop_pred_stack(&stack);
1674 if (!pred)
1675 return -EINVAL;
1676 /* This item is where we start from in matching */
1677 root = pred;
1678 /* Make sure the stack is empty */
1679 pred = __pop_pred_stack(&stack);
1680 if (WARN_ON(pred)) {
1681 err = -EINVAL;
1682 filter->root = NULL;
1683 goto fail;
1684 }
1685 err = check_pred_tree(filter, root);
1686 if (err)
1687 goto fail;
1688
1689 /* Optimize the tree */
1690 err = fold_pred_tree(filter, root);
1691 if (err)
1692 goto fail;
1693
1694 /* We don't set root until we know it works */
1695 barrier();
1696 filter->root = root;
1697 }
1698
1699 err = 0;
1700fail:
1701 __free_pred_stack(&stack);
1702 return err;
1247} 1703}
1248 1704
1705struct filter_list {
1706 struct list_head list;
1707 struct event_filter *filter;
1708};
1709
1249static int replace_system_preds(struct event_subsystem *system, 1710static int replace_system_preds(struct event_subsystem *system,
1250 struct filter_parse_state *ps, 1711 struct filter_parse_state *ps,
1251 char *filter_string) 1712 char *filter_string)
1252{ 1713{
1253 struct ftrace_event_call *call; 1714 struct ftrace_event_call *call;
1715 struct filter_list *filter_item;
1716 struct filter_list *tmp;
1717 LIST_HEAD(filter_list);
1254 bool fail = true; 1718 bool fail = true;
1255 int err; 1719 int err;
1256 1720
1257 list_for_each_entry(call, &ftrace_events, list) { 1721 list_for_each_entry(call, &ftrace_events, list) {
1258 struct event_filter *filter = call->filter;
1259 1722
1260 if (strcmp(call->class->system, system->name) != 0) 1723 if (strcmp(call->class->system, system->name) != 0)
1261 continue; 1724 continue;
1262 1725
1263 /* try to see if the filter can be applied */ 1726 /*
1264 err = replace_preds(call, filter, ps, filter_string, true); 1727 * Try to see if the filter can be applied
1728 * (filter arg is ignored on dry_run)
1729 */
1730 err = replace_preds(call, NULL, ps, filter_string, true);
1265 if (err) 1731 if (err)
1732 goto fail;
1733 }
1734
1735 list_for_each_entry(call, &ftrace_events, list) {
1736 struct event_filter *filter;
1737
1738 if (strcmp(call->class->system, system->name) != 0)
1266 continue; 1739 continue;
1267 1740
1268 /* really apply the filter */ 1741 filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL);
1269 filter_disable_preds(call); 1742 if (!filter_item)
1270 err = replace_preds(call, filter, ps, filter_string, false); 1743 goto fail_mem;
1744
1745 list_add_tail(&filter_item->list, &filter_list);
1746
1747 filter_item->filter = __alloc_filter();
1748 if (!filter_item->filter)
1749 goto fail_mem;
1750 filter = filter_item->filter;
1751
1752 /* Can only fail on no memory */
1753 err = replace_filter_string(filter, filter_string);
1271 if (err) 1754 if (err)
1272 filter_disable_preds(call); 1755 goto fail_mem;
1273 else { 1756
1757 err = replace_preds(call, filter, ps, filter_string, false);
1758 if (err) {
1759 filter_disable(call);
1760 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
1761 append_filter_err(ps, filter);
1762 } else
1274 call->flags |= TRACE_EVENT_FL_FILTERED; 1763 call->flags |= TRACE_EVENT_FL_FILTERED;
1275 replace_filter_string(filter, filter_string); 1764 /*
1276 } 1765 * Regardless of if this returned an error, we still
1766 * replace the filter for the call.
1767 */
1768 filter = call->filter;
1769 call->filter = filter_item->filter;
1770 filter_item->filter = filter;
1771
1277 fail = false; 1772 fail = false;
1278 } 1773 }
1279 1774
1280 if (fail) { 1775 if (fail)
1281 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); 1776 goto fail;
1282 return -EINVAL; 1777
1778 /*
1779 * The calls can still be using the old filters.
1780 * Do a synchronize_sched() to ensure all calls are
1781 * done with them before we free them.
1782 */
1783 synchronize_sched();
1784 list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
1785 __free_filter(filter_item->filter);
1786 list_del(&filter_item->list);
1787 kfree(filter_item);
1283 } 1788 }
1284 return 0; 1789 return 0;
1790 fail:
1791 /* No call succeeded */
1792 list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
1793 list_del(&filter_item->list);
1794 kfree(filter_item);
1795 }
1796 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
1797 return -EINVAL;
1798 fail_mem:
1799 /* If any call succeeded, we still need to sync */
1800 if (!fail)
1801 synchronize_sched();
1802 list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
1803 __free_filter(filter_item->filter);
1804 list_del(&filter_item->list);
1805 kfree(filter_item);
1806 }
1807 return -ENOMEM;
1285} 1808}
1286 1809
1287int apply_event_filter(struct ftrace_event_call *call, char *filter_string) 1810int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1288{ 1811{
1289 int err;
1290 struct filter_parse_state *ps; 1812 struct filter_parse_state *ps;
1813 struct event_filter *filter;
1814 struct event_filter *tmp;
1815 int err = 0;
1291 1816
1292 mutex_lock(&event_mutex); 1817 mutex_lock(&event_mutex);
1293 1818
1294 err = init_preds(call);
1295 if (err)
1296 goto out_unlock;
1297
1298 if (!strcmp(strstrip(filter_string), "0")) { 1819 if (!strcmp(strstrip(filter_string), "0")) {
1299 filter_disable_preds(call); 1820 filter_disable(call);
1300 remove_filter_string(call->filter); 1821 filter = call->filter;
1822 if (!filter)
1823 goto out_unlock;
1824 call->filter = NULL;
1825 /* Make sure the filter is not being used */
1826 synchronize_sched();
1827 __free_filter(filter);
1301 goto out_unlock; 1828 goto out_unlock;
1302 } 1829 }
1303 1830
@@ -1306,22 +1833,41 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1306 if (!ps) 1833 if (!ps)
1307 goto out_unlock; 1834 goto out_unlock;
1308 1835
1309 filter_disable_preds(call); 1836 filter = __alloc_filter();
1310 replace_filter_string(call->filter, filter_string); 1837 if (!filter) {
1838 kfree(ps);
1839 goto out_unlock;
1840 }
1841
1842 replace_filter_string(filter, filter_string);
1311 1843
1312 parse_init(ps, filter_ops, filter_string); 1844 parse_init(ps, filter_ops, filter_string);
1313 err = filter_parse(ps); 1845 err = filter_parse(ps);
1314 if (err) { 1846 if (err) {
1315 append_filter_err(ps, call->filter); 1847 append_filter_err(ps, filter);
1316 goto out; 1848 goto out;
1317 } 1849 }
1318 1850
1319 err = replace_preds(call, call->filter, ps, filter_string, false); 1851 err = replace_preds(call, filter, ps, filter_string, false);
1320 if (err) 1852 if (err) {
1321 append_filter_err(ps, call->filter); 1853 filter_disable(call);
1322 else 1854 append_filter_err(ps, filter);
1855 } else
1323 call->flags |= TRACE_EVENT_FL_FILTERED; 1856 call->flags |= TRACE_EVENT_FL_FILTERED;
1324out: 1857out:
1858 /*
1859 * Always swap the call filter with the new filter
1860 * even if there was an error. If there was an error
1861 * in the filter, we disable the filter and show the error
1862 * string
1863 */
1864 tmp = call->filter;
1865 call->filter = filter;
1866 if (tmp) {
1867 /* Make sure the call is done with the filter */
1868 synchronize_sched();
1869 __free_filter(tmp);
1870 }
1325 filter_opstack_clear(ps); 1871 filter_opstack_clear(ps);
1326 postfix_clear(ps); 1872 postfix_clear(ps);
1327 kfree(ps); 1873 kfree(ps);
@@ -1334,18 +1880,21 @@ out_unlock:
1334int apply_subsystem_event_filter(struct event_subsystem *system, 1880int apply_subsystem_event_filter(struct event_subsystem *system,
1335 char *filter_string) 1881 char *filter_string)
1336{ 1882{
1337 int err;
1338 struct filter_parse_state *ps; 1883 struct filter_parse_state *ps;
1884 struct event_filter *filter;
1885 int err = 0;
1339 1886
1340 mutex_lock(&event_mutex); 1887 mutex_lock(&event_mutex);
1341 1888
1342 err = init_subsystem_preds(system);
1343 if (err)
1344 goto out_unlock;
1345
1346 if (!strcmp(strstrip(filter_string), "0")) { 1889 if (!strcmp(strstrip(filter_string), "0")) {
1347 filter_free_subsystem_preds(system); 1890 filter_free_subsystem_preds(system);
1348 remove_filter_string(system->filter); 1891 remove_filter_string(system->filter);
1892 filter = system->filter;
1893 system->filter = NULL;
1894 /* Ensure all filters are no longer used */
1895 synchronize_sched();
1896 filter_free_subsystem_filters(system);
1897 __free_filter(filter);
1349 goto out_unlock; 1898 goto out_unlock;
1350 } 1899 }
1351 1900
@@ -1354,7 +1903,17 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1354 if (!ps) 1903 if (!ps)
1355 goto out_unlock; 1904 goto out_unlock;
1356 1905
1357 replace_filter_string(system->filter, filter_string); 1906 filter = __alloc_filter();
1907 if (!filter)
1908 goto out;
1909
1910 replace_filter_string(filter, filter_string);
1911 /*
1912 * No event actually uses the system filter
1913 * we can free it without synchronize_sched().
1914 */
1915 __free_filter(system->filter);
1916 system->filter = filter;
1358 1917
1359 parse_init(ps, filter_ops, filter_string); 1918 parse_init(ps, filter_ops, filter_string);
1360 err = filter_parse(ps); 1919 err = filter_parse(ps);
@@ -1384,7 +1943,7 @@ void ftrace_profile_free_filter(struct perf_event *event)
1384 struct event_filter *filter = event->filter; 1943 struct event_filter *filter = event->filter;
1385 1944
1386 event->filter = NULL; 1945 event->filter = NULL;
1387 __free_preds(filter); 1946 __free_filter(filter);
1388} 1947}
1389 1948
1390int ftrace_profile_set_filter(struct perf_event *event, int event_id, 1949int ftrace_profile_set_filter(struct perf_event *event, int event_id,
@@ -1410,8 +1969,8 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
1410 if (event->filter) 1969 if (event->filter)
1411 goto out_unlock; 1970 goto out_unlock;
1412 1971
1413 filter = __alloc_preds(); 1972 filter = __alloc_filter();
1414 if (IS_ERR(filter)) { 1973 if (!filter) {
1415 err = PTR_ERR(filter); 1974 err = PTR_ERR(filter);
1416 goto out_unlock; 1975 goto out_unlock;
1417 } 1976 }
@@ -1419,7 +1978,7 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
1419 err = -ENOMEM; 1978 err = -ENOMEM;
1420 ps = kzalloc(sizeof(*ps), GFP_KERNEL); 1979 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
1421 if (!ps) 1980 if (!ps)
1422 goto free_preds; 1981 goto free_filter;
1423 1982
1424 parse_init(ps, filter_ops, filter_str); 1983 parse_init(ps, filter_ops, filter_str);
1425 err = filter_parse(ps); 1984 err = filter_parse(ps);
@@ -1435,9 +1994,9 @@ free_ps:
1435 postfix_clear(ps); 1994 postfix_clear(ps);
1436 kfree(ps); 1995 kfree(ps);
1437 1996
1438free_preds: 1997free_filter:
1439 if (err) 1998 if (err)
1440 __free_preds(filter); 1999 __free_filter(filter);
1441 2000
1442out_unlock: 2001out_unlock:
1443 mutex_unlock(&event_mutex); 2002 mutex_unlock(&event_mutex);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 2dec9bcde8b4..8435b43b1782 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -353,6 +353,43 @@ static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
353 kfree(data); 353 kfree(data);
354} 354}
355 355
356/* Bitfield fetch function */
357struct bitfield_fetch_param {
358 struct fetch_param orig;
359 unsigned char hi_shift;
360 unsigned char low_shift;
361};
362
363#define DEFINE_FETCH_bitfield(type) \
364static __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,\
365 void *data, void *dest) \
366{ \
367 struct bitfield_fetch_param *bprm = data; \
368 type buf = 0; \
369 call_fetch(&bprm->orig, regs, &buf); \
370 if (buf) { \
371 buf <<= bprm->hi_shift; \
372 buf >>= bprm->low_shift; \
373 } \
374 *(type *)dest = buf; \
375}
376DEFINE_BASIC_FETCH_FUNCS(bitfield)
377#define fetch_bitfield_string NULL
378#define fetch_bitfield_string_size NULL
379
380static __kprobes void
381free_bitfield_fetch_param(struct bitfield_fetch_param *data)
382{
383 /*
384 * Don't check the bitfield itself, because this must be the
385 * last fetch function.
386 */
387 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
388 free_deref_fetch_param(data->orig.data);
389 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
390 free_symbol_cache(data->orig.data);
391 kfree(data);
392}
356/* Default (unsigned long) fetch type */ 393/* Default (unsigned long) fetch type */
357#define __DEFAULT_FETCH_TYPE(t) u##t 394#define __DEFAULT_FETCH_TYPE(t) u##t
358#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t) 395#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
@@ -367,6 +404,7 @@ enum {
367 FETCH_MTD_memory, 404 FETCH_MTD_memory,
368 FETCH_MTD_symbol, 405 FETCH_MTD_symbol,
369 FETCH_MTD_deref, 406 FETCH_MTD_deref,
407 FETCH_MTD_bitfield,
370 FETCH_MTD_END, 408 FETCH_MTD_END,
371}; 409};
372 410
@@ -387,6 +425,7 @@ ASSIGN_FETCH_FUNC(retval, ftype), \
387ASSIGN_FETCH_FUNC(memory, ftype), \ 425ASSIGN_FETCH_FUNC(memory, ftype), \
388ASSIGN_FETCH_FUNC(symbol, ftype), \ 426ASSIGN_FETCH_FUNC(symbol, ftype), \
389ASSIGN_FETCH_FUNC(deref, ftype), \ 427ASSIGN_FETCH_FUNC(deref, ftype), \
428ASSIGN_FETCH_FUNC(bitfield, ftype), \
390 } \ 429 } \
391 } 430 }
392 431
@@ -430,9 +469,33 @@ static const struct fetch_type *find_fetch_type(const char *type)
430 if (!type) 469 if (!type)
431 type = DEFAULT_FETCH_TYPE_STR; 470 type = DEFAULT_FETCH_TYPE_STR;
432 471
472 /* Special case: bitfield */
473 if (*type == 'b') {
474 unsigned long bs;
475 type = strchr(type, '/');
476 if (!type)
477 goto fail;
478 type++;
479 if (strict_strtoul(type, 0, &bs))
480 goto fail;
481 switch (bs) {
482 case 8:
483 return find_fetch_type("u8");
484 case 16:
485 return find_fetch_type("u16");
486 case 32:
487 return find_fetch_type("u32");
488 case 64:
489 return find_fetch_type("u64");
490 default:
491 goto fail;
492 }
493 }
494
433 for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++) 495 for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++)
434 if (strcmp(type, fetch_type_table[i].name) == 0) 496 if (strcmp(type, fetch_type_table[i].name) == 0)
435 return &fetch_type_table[i]; 497 return &fetch_type_table[i];
498fail:
436 return NULL; 499 return NULL;
437} 500}
438 501
@@ -586,7 +649,9 @@ error:
586 649
587static void free_probe_arg(struct probe_arg *arg) 650static void free_probe_arg(struct probe_arg *arg)
588{ 651{
589 if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn)) 652 if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
653 free_bitfield_fetch_param(arg->fetch.data);
654 else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
590 free_deref_fetch_param(arg->fetch.data); 655 free_deref_fetch_param(arg->fetch.data);
591 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn)) 656 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
592 free_symbol_cache(arg->fetch.data); 657 free_symbol_cache(arg->fetch.data);
@@ -767,16 +832,15 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,
767 } 832 }
768 break; 833 break;
769 case '+': /* deref memory */ 834 case '+': /* deref memory */
835 arg++; /* Skip '+', because strict_strtol() rejects it. */
770 case '-': 836 case '-':
771 tmp = strchr(arg, '('); 837 tmp = strchr(arg, '(');
772 if (!tmp) 838 if (!tmp)
773 break; 839 break;
774 *tmp = '\0'; 840 *tmp = '\0';
775 ret = strict_strtol(arg + 1, 0, &offset); 841 ret = strict_strtol(arg, 0, &offset);
776 if (ret) 842 if (ret)
777 break; 843 break;
778 if (arg[0] == '-')
779 offset = -offset;
780 arg = tmp + 1; 844 arg = tmp + 1;
781 tmp = strrchr(arg, ')'); 845 tmp = strrchr(arg, ')');
782 if (tmp) { 846 if (tmp) {
@@ -807,6 +871,41 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,
807 return ret; 871 return ret;
808} 872}
809 873
874#define BYTES_TO_BITS(nb) ((BITS_PER_LONG * (nb)) / sizeof(long))
875
876/* Bitfield type needs to be parsed into a fetch function */
877static int __parse_bitfield_probe_arg(const char *bf,
878 const struct fetch_type *t,
879 struct fetch_param *f)
880{
881 struct bitfield_fetch_param *bprm;
882 unsigned long bw, bo;
883 char *tail;
884
885 if (*bf != 'b')
886 return 0;
887
888 bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
889 if (!bprm)
890 return -ENOMEM;
891 bprm->orig = *f;
892 f->fn = t->fetch[FETCH_MTD_bitfield];
893 f->data = (void *)bprm;
894
895 bw = simple_strtoul(bf + 1, &tail, 0); /* Use simple one */
896 if (bw == 0 || *tail != '@')
897 return -EINVAL;
898
899 bf = tail + 1;
900 bo = simple_strtoul(bf, &tail, 0);
901 if (tail == bf || *tail != '/')
902 return -EINVAL;
903
904 bprm->hi_shift = BYTES_TO_BITS(t->size) - (bw + bo);
905 bprm->low_shift = bprm->hi_shift + bo;
906 return (BYTES_TO_BITS(t->size) < (bw + bo)) ? -EINVAL : 0;
907}
908
810/* String length checking wrapper */ 909/* String length checking wrapper */
811static int parse_probe_arg(char *arg, struct trace_probe *tp, 910static int parse_probe_arg(char *arg, struct trace_probe *tp,
812 struct probe_arg *parg, int is_return) 911 struct probe_arg *parg, int is_return)
@@ -836,6 +935,8 @@ static int parse_probe_arg(char *arg, struct trace_probe *tp,
836 parg->offset = tp->size; 935 parg->offset = tp->size;
837 tp->size += parg->type->size; 936 tp->size += parg->type->size;
838 ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return); 937 ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return);
938 if (ret >= 0 && t != NULL)
939 ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch);
839 if (ret >= 0) { 940 if (ret >= 0) {
840 parg->fetch_size.fn = get_fetch_size_function(parg->type, 941 parg->fetch_size.fn = get_fetch_size_function(parg->type,
841 parg->fetch.fn); 942 parg->fetch.fn);
@@ -1130,7 +1231,7 @@ static int command_trace_probe(const char *buf)
1130 return ret; 1231 return ret;
1131} 1232}
1132 1233
1133#define WRITE_BUFSIZE 128 1234#define WRITE_BUFSIZE 4096
1134 1235
1135static ssize_t probes_write(struct file *file, const char __user *buffer, 1236static ssize_t probes_write(struct file *file, const char __user *buffer,
1136 size_t count, loff_t *ppos) 1237 size_t count, loff_t *ppos)
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 02272baa2206..456be9063c2d 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -529,24 +529,34 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
529 * @entry: The trace entry field from the ring buffer 529 * @entry: The trace entry field from the ring buffer
530 * 530 *
531 * Prints the generic fields of irqs off, in hard or softirq, preempt 531 * Prints the generic fields of irqs off, in hard or softirq, preempt
532 * count and lock depth. 532 * count.
533 */ 533 */
534int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) 534int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
535{ 535{
536 int hardirq, softirq; 536 char hardsoft_irq;
537 char need_resched;
538 char irqs_off;
539 int hardirq;
540 int softirq;
537 int ret; 541 int ret;
538 542
539 hardirq = entry->flags & TRACE_FLAG_HARDIRQ; 543 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
540 softirq = entry->flags & TRACE_FLAG_SOFTIRQ; 544 softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
541 545
546 irqs_off =
547 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
548 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' :
549 '.';
550 need_resched =
551 (entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.';
552 hardsoft_irq =
553 (hardirq && softirq) ? 'H' :
554 hardirq ? 'h' :
555 softirq ? 's' :
556 '.';
557
542 if (!trace_seq_printf(s, "%c%c%c", 558 if (!trace_seq_printf(s, "%c%c%c",
543 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : 559 irqs_off, need_resched, hardsoft_irq))
544 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ?
545 'X' : '.',
546 (entry->flags & TRACE_FLAG_NEED_RESCHED) ?
547 'N' : '.',
548 (hardirq && softirq) ? 'H' :
549 hardirq ? 'h' : softirq ? 's' : '.'))
550 return 0; 560 return 0;
551 561
552 if (entry->preempt_count) 562 if (entry->preempt_count)
@@ -554,13 +564,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
554 else 564 else
555 ret = trace_seq_putc(s, '.'); 565 ret = trace_seq_putc(s, '.');
556 566
557 if (!ret) 567 return ret;
558 return 0;
559
560 if (entry->lock_depth < 0)
561 return trace_seq_putc(s, '.');
562
563 return trace_seq_printf(s, "%d", entry->lock_depth);
564} 568}
565 569
566static int 570static int
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 8f758d070c43..7e62c0a18456 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -247,51 +247,3 @@ void tracing_sched_switch_assign_trace(struct trace_array *tr)
247 ctx_trace = tr; 247 ctx_trace = tr;
248} 248}
249 249
250static void stop_sched_trace(struct trace_array *tr)
251{
252 tracing_stop_sched_switch_record();
253}
254
255static int sched_switch_trace_init(struct trace_array *tr)
256{
257 ctx_trace = tr;
258 tracing_reset_online_cpus(tr);
259 tracing_start_sched_switch_record();
260 return 0;
261}
262
263static void sched_switch_trace_reset(struct trace_array *tr)
264{
265 if (sched_ref)
266 stop_sched_trace(tr);
267}
268
269static void sched_switch_trace_start(struct trace_array *tr)
270{
271 sched_stopped = 0;
272}
273
274static void sched_switch_trace_stop(struct trace_array *tr)
275{
276 sched_stopped = 1;
277}
278
279static struct tracer sched_switch_trace __read_mostly =
280{
281 .name = "sched_switch",
282 .init = sched_switch_trace_init,
283 .reset = sched_switch_trace_reset,
284 .start = sched_switch_trace_start,
285 .stop = sched_switch_trace_stop,
286 .wait_pipe = poll_wait_pipe,
287#ifdef CONFIG_FTRACE_SELFTEST
288 .selftest = trace_selftest_startup_sched_switch,
289#endif
290};
291
292__init static int init_sched_switch_trace(void)
293{
294 return register_tracer(&sched_switch_trace);
295}
296device_initcall(init_sched_switch_trace);
297
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 5c9fe08d2093..ee7b5a0bb9f8 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -60,6 +60,19 @@ extern struct syscall_metadata *__stop_syscalls_metadata[];
60 60
61static struct syscall_metadata **syscalls_metadata; 61static struct syscall_metadata **syscalls_metadata;
62 62
63#ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME
64static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
65{
66 /*
67 * Only compare after the "sys" prefix. Archs that use
68 * syscall wrappers may have syscalls symbols aliases prefixed
69 * with "SyS" instead of "sys", leading to an unwanted
70 * mismatch.
71 */
72 return !strcmp(sym + 3, name + 3);
73}
74#endif
75
63static __init struct syscall_metadata * 76static __init struct syscall_metadata *
64find_syscall_meta(unsigned long syscall) 77find_syscall_meta(unsigned long syscall)
65{ 78{
@@ -72,14 +85,11 @@ find_syscall_meta(unsigned long syscall)
72 stop = __stop_syscalls_metadata; 85 stop = __stop_syscalls_metadata;
73 kallsyms_lookup(syscall, NULL, NULL, NULL, str); 86 kallsyms_lookup(syscall, NULL, NULL, NULL, str);
74 87
88 if (arch_syscall_match_sym_name(str, "sys_ni_syscall"))
89 return NULL;
90
75 for ( ; start < stop; start++) { 91 for ( ; start < stop; start++) {
76 /* 92 if ((*start)->name && arch_syscall_match_sym_name(str, (*start)->name))
77 * Only compare after the "sys" prefix. Archs that use
78 * syscall wrappers may have syscalls symbols aliases prefixed
79 * with "SyS" instead of "sys", leading to an unwanted
80 * mismatch.
81 */
82 if ((*start)->name && !strcmp((*start)->name + 3, str + 3))
83 return *start; 93 return *start;
84 } 94 }
85 return NULL; 95 return NULL;
@@ -359,7 +369,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
359 int num; 369 int num;
360 370
361 num = ((struct syscall_metadata *)call->data)->syscall_nr; 371 num = ((struct syscall_metadata *)call->data)->syscall_nr;
362 if (num < 0 || num >= NR_syscalls) 372 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
363 return -ENOSYS; 373 return -ENOSYS;
364 mutex_lock(&syscall_trace_lock); 374 mutex_lock(&syscall_trace_lock);
365 if (!sys_refcount_enter) 375 if (!sys_refcount_enter)
@@ -377,7 +387,7 @@ void unreg_event_syscall_enter(struct ftrace_event_call *call)
377 int num; 387 int num;
378 388
379 num = ((struct syscall_metadata *)call->data)->syscall_nr; 389 num = ((struct syscall_metadata *)call->data)->syscall_nr;
380 if (num < 0 || num >= NR_syscalls) 390 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
381 return; 391 return;
382 mutex_lock(&syscall_trace_lock); 392 mutex_lock(&syscall_trace_lock);
383 sys_refcount_enter--; 393 sys_refcount_enter--;
@@ -393,7 +403,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
393 int num; 403 int num;
394 404
395 num = ((struct syscall_metadata *)call->data)->syscall_nr; 405 num = ((struct syscall_metadata *)call->data)->syscall_nr;
396 if (num < 0 || num >= NR_syscalls) 406 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
397 return -ENOSYS; 407 return -ENOSYS;
398 mutex_lock(&syscall_trace_lock); 408 mutex_lock(&syscall_trace_lock);
399 if (!sys_refcount_exit) 409 if (!sys_refcount_exit)
@@ -411,7 +421,7 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call)
411 int num; 421 int num;
412 422
413 num = ((struct syscall_metadata *)call->data)->syscall_nr; 423 num = ((struct syscall_metadata *)call->data)->syscall_nr;
414 if (num < 0 || num >= NR_syscalls) 424 if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
415 return; 425 return;
416 mutex_lock(&syscall_trace_lock); 426 mutex_lock(&syscall_trace_lock);
417 sys_refcount_exit--; 427 sys_refcount_exit--;
@@ -424,6 +434,14 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call)
424int init_syscall_trace(struct ftrace_event_call *call) 434int init_syscall_trace(struct ftrace_event_call *call)
425{ 435{
426 int id; 436 int id;
437 int num;
438
439 num = ((struct syscall_metadata *)call->data)->syscall_nr;
440 if (num < 0 || num >= NR_syscalls) {
441 pr_debug("syscall %s metadata not mapped, disabling ftrace event\n",
442 ((struct syscall_metadata *)call->data)->name);
443 return -ENOSYS;
444 }
427 445
428 if (set_syscall_print_fmt(call) < 0) 446 if (set_syscall_print_fmt(call) < 0)
429 return -ENOMEM; 447 return -ENOMEM;
@@ -438,7 +456,7 @@ int init_syscall_trace(struct ftrace_event_call *call)
438 return id; 456 return id;
439} 457}
440 458
441unsigned long __init arch_syscall_addr(int nr) 459unsigned long __init __weak arch_syscall_addr(int nr)
442{ 460{
443 return (unsigned long)sys_call_table[nr]; 461 return (unsigned long)sys_call_table[nr];
444} 462}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ee6578b578ad..b5fe4c00eb3c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -316,6 +316,11 @@ static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
316 316
317static struct debug_obj_descr work_debug_descr; 317static struct debug_obj_descr work_debug_descr;
318 318
319static void *work_debug_hint(void *addr)
320{
321 return ((struct work_struct *) addr)->func;
322}
323
319/* 324/*
320 * fixup_init is called when: 325 * fixup_init is called when:
321 * - an active object is initialized 326 * - an active object is initialized
@@ -387,6 +392,7 @@ static int work_fixup_free(void *addr, enum debug_obj_state state)
387 392
388static struct debug_obj_descr work_debug_descr = { 393static struct debug_obj_descr work_debug_descr = {
389 .name = "work_struct", 394 .name = "work_struct",
395 .debug_hint = work_debug_hint,
390 .fixup_init = work_fixup_init, 396 .fixup_init = work_fixup_init,
391 .fixup_activate = work_fixup_activate, 397 .fixup_activate = work_fixup_activate,
392 .fixup_free = work_fixup_free, 398 .fixup_free = work_fixup_free,
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index deebcc57d4e6..9d86e45086f5 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -249,14 +249,17 @@ static struct debug_bucket *get_bucket(unsigned long addr)
249 249
250static void debug_print_object(struct debug_obj *obj, char *msg) 250static void debug_print_object(struct debug_obj *obj, char *msg)
251{ 251{
252 struct debug_obj_descr *descr = obj->descr;
252 static int limit; 253 static int limit;
253 254
254 if (limit < 5 && obj->descr != descr_test) { 255 if (limit < 5 && descr != descr_test) {
256 void *hint = descr->debug_hint ?
257 descr->debug_hint(obj->object) : NULL;
255 limit++; 258 limit++;
256 WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) " 259 WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) "
257 "object type: %s\n", 260 "object type: %s hint: %pS\n",
258 msg, obj_states[obj->state], obj->astate, 261 msg, obj_states[obj->state], obj->astate,
259 obj->descr->name); 262 descr->name, hint);
260 } 263 }
261 debug_objects_warnings++; 264 debug_objects_warnings++;
262} 265}
diff --git a/lib/nlattr.c b/lib/nlattr.c
index 5021cbc34411..ac09f2226dc7 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -148,7 +148,7 @@ nla_policy_len(const struct nla_policy *p, int n)
148{ 148{
149 int i, len = 0; 149 int i, len = 0;
150 150
151 for (i = 0; i < n; i++) { 151 for (i = 0; i < n; i++, p++) {
152 if (p->len) 152 if (p->len)
153 len += nla_total_size(p->len); 153 len += nla_total_size(p->len);
154 else if (nla_attr_minlen[p->type]) 154 else if (nla_attr_minlen[p->type])
diff --git a/lib/plist.c b/lib/plist.c
index 1471988d9190..0ae7e6431726 100644
--- a/lib/plist.c
+++ b/lib/plist.c
@@ -28,6 +28,8 @@
28 28
29#ifdef CONFIG_DEBUG_PI_LIST 29#ifdef CONFIG_DEBUG_PI_LIST
30 30
31static struct plist_head test_head;
32
31static void plist_check_prev_next(struct list_head *t, struct list_head *p, 33static void plist_check_prev_next(struct list_head *t, struct list_head *p,
32 struct list_head *n) 34 struct list_head *n)
33{ 35{
@@ -54,12 +56,13 @@ static void plist_check_list(struct list_head *top)
54 56
55static void plist_check_head(struct plist_head *head) 57static void plist_check_head(struct plist_head *head)
56{ 58{
57 WARN_ON(!head->rawlock && !head->spinlock); 59 WARN_ON(head != &test_head && !head->rawlock && !head->spinlock);
58 if (head->rawlock) 60 if (head->rawlock)
59 WARN_ON_SMP(!raw_spin_is_locked(head->rawlock)); 61 WARN_ON_SMP(!raw_spin_is_locked(head->rawlock));
60 if (head->spinlock) 62 if (head->spinlock)
61 WARN_ON_SMP(!spin_is_locked(head->spinlock)); 63 WARN_ON_SMP(!spin_is_locked(head->spinlock));
62 plist_check_list(&head->prio_list); 64 if (!plist_head_empty(head))
65 plist_check_list(&plist_first(head)->prio_list);
63 plist_check_list(&head->node_list); 66 plist_check_list(&head->node_list);
64} 67}
65 68
@@ -75,25 +78,33 @@ static void plist_check_head(struct plist_head *head)
75 */ 78 */
76void plist_add(struct plist_node *node, struct plist_head *head) 79void plist_add(struct plist_node *node, struct plist_head *head)
77{ 80{
78 struct plist_node *iter; 81 struct plist_node *first, *iter, *prev = NULL;
82 struct list_head *node_next = &head->node_list;
79 83
80 plist_check_head(head); 84 plist_check_head(head);
81 WARN_ON(!plist_node_empty(node)); 85 WARN_ON(!plist_node_empty(node));
86 WARN_ON(!list_empty(&node->prio_list));
87
88 if (plist_head_empty(head))
89 goto ins_node;
82 90
83 list_for_each_entry(iter, &head->prio_list, plist.prio_list) { 91 first = iter = plist_first(head);
84 if (node->prio < iter->prio) 92
85 goto lt_prio; 93 do {
86 else if (node->prio == iter->prio) { 94 if (node->prio < iter->prio) {
87 iter = list_entry(iter->plist.prio_list.next, 95 node_next = &iter->node_list;
88 struct plist_node, plist.prio_list); 96 break;
89 goto eq_prio;
90 } 97 }
91 }
92 98
93lt_prio: 99 prev = iter;
94 list_add_tail(&node->plist.prio_list, &iter->plist.prio_list); 100 iter = list_entry(iter->prio_list.next,
95eq_prio: 101 struct plist_node, prio_list);
96 list_add_tail(&node->plist.node_list, &iter->plist.node_list); 102 } while (iter != first);
103
104 if (!prev || prev->prio != node->prio)
105 list_add_tail(&node->prio_list, &iter->prio_list);
106ins_node:
107 list_add_tail(&node->node_list, node_next);
97 108
98 plist_check_head(head); 109 plist_check_head(head);
99} 110}
@@ -108,14 +119,98 @@ void plist_del(struct plist_node *node, struct plist_head *head)
108{ 119{
109 plist_check_head(head); 120 plist_check_head(head);
110 121
111 if (!list_empty(&node->plist.prio_list)) { 122 if (!list_empty(&node->prio_list)) {
112 struct plist_node *next = plist_first(&node->plist); 123 if (node->node_list.next != &head->node_list) {
124 struct plist_node *next;
125
126 next = list_entry(node->node_list.next,
127 struct plist_node, node_list);
113 128
114 list_move_tail(&next->plist.prio_list, &node->plist.prio_list); 129 /* add the next plist_node into prio_list */
115 list_del_init(&node->plist.prio_list); 130 if (list_empty(&next->prio_list))
131 list_add(&next->prio_list, &node->prio_list);
132 }
133 list_del_init(&node->prio_list);
116 } 134 }
117 135
118 list_del_init(&node->plist.node_list); 136 list_del_init(&node->node_list);
119 137
120 plist_check_head(head); 138 plist_check_head(head);
121} 139}
140
141#ifdef CONFIG_DEBUG_PI_LIST
142#include <linux/sched.h>
143#include <linux/module.h>
144#include <linux/init.h>
145
146static struct plist_node __initdata test_node[241];
147
148static void __init plist_test_check(int nr_expect)
149{
150 struct plist_node *first, *prio_pos, *node_pos;
151
152 if (plist_head_empty(&test_head)) {
153 BUG_ON(nr_expect != 0);
154 return;
155 }
156
157 prio_pos = first = plist_first(&test_head);
158 plist_for_each(node_pos, &test_head) {
159 if (nr_expect-- < 0)
160 break;
161 if (node_pos == first)
162 continue;
163 if (node_pos->prio == prio_pos->prio) {
164 BUG_ON(!list_empty(&node_pos->prio_list));
165 continue;
166 }
167
168 BUG_ON(prio_pos->prio > node_pos->prio);
169 BUG_ON(prio_pos->prio_list.next != &node_pos->prio_list);
170 prio_pos = node_pos;
171 }
172
173 BUG_ON(nr_expect != 0);
174 BUG_ON(prio_pos->prio_list.next != &first->prio_list);
175}
176
177static int __init plist_test(void)
178{
179 int nr_expect = 0, i, loop;
180 unsigned int r = local_clock();
181
182 printk(KERN_INFO "start plist test\n");
183 plist_head_init(&test_head, NULL);
184 for (i = 0; i < ARRAY_SIZE(test_node); i++)
185 plist_node_init(test_node + i, 0);
186
187 for (loop = 0; loop < 1000; loop++) {
188 r = r * 193939 % 47629;
189 i = r % ARRAY_SIZE(test_node);
190 if (plist_node_empty(test_node + i)) {
191 r = r * 193939 % 47629;
192 test_node[i].prio = r % 99;
193 plist_add(test_node + i, &test_head);
194 nr_expect++;
195 } else {
196 plist_del(test_node + i, &test_head);
197 nr_expect--;
198 }
199 plist_test_check(nr_expect);
200 }
201
202 for (i = 0; i < ARRAY_SIZE(test_node); i++) {
203 if (plist_node_empty(test_node + i))
204 continue;
205 plist_del(test_node + i, &test_head);
206 nr_expect--;
207 plist_test_check(nr_expect);
208 }
209
210 printk(KERN_INFO "end plist test\n");
211 return 0;
212}
213
214module_init(plist_test);
215
216#endif
diff --git a/lib/rwsem.c b/lib/rwsem.c
index f236d7cd5cf3..aa7c3052261f 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -222,8 +222,7 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
222/* 222/*
223 * wait for the read lock to be granted 223 * wait for the read lock to be granted
224 */ 224 */
225asmregparm struct rw_semaphore __sched * 225struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
226rwsem_down_read_failed(struct rw_semaphore *sem)
227{ 226{
228 return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ, 227 return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ,
229 -RWSEM_ACTIVE_READ_BIAS); 228 -RWSEM_ACTIVE_READ_BIAS);
@@ -232,8 +231,7 @@ rwsem_down_read_failed(struct rw_semaphore *sem)
232/* 231/*
233 * wait for the write lock to be granted 232 * wait for the write lock to be granted
234 */ 233 */
235asmregparm struct rw_semaphore __sched * 234struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
236rwsem_down_write_failed(struct rw_semaphore *sem)
237{ 235{
238 return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE, 236 return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE,
239 -RWSEM_ACTIVE_WRITE_BIAS); 237 -RWSEM_ACTIVE_WRITE_BIAS);
@@ -243,7 +241,7 @@ rwsem_down_write_failed(struct rw_semaphore *sem)
243 * handle waking up a waiter on the semaphore 241 * handle waking up a waiter on the semaphore
244 * - up_read/up_write has decremented the active part of count if we come here 242 * - up_read/up_write has decremented the active part of count if we come here
245 */ 243 */
246asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) 244struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
247{ 245{
248 unsigned long flags; 246 unsigned long flags;
249 247
@@ -263,7 +261,7 @@ asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
263 * - caller incremented waiting part of count and discovered it still negative 261 * - caller incremented waiting part of count and discovered it still negative
264 * - just wake up any readers at the front of the queue 262 * - just wake up any readers at the front of the queue
265 */ 263 */
266asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) 264struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
267{ 265{
268 unsigned long flags; 266 unsigned long flags;
269 267
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index c47bbe11b804..93ca08b8a451 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -686,8 +686,10 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
686 /* 686 /*
687 * Ensure that the address returned is DMA'ble 687 * Ensure that the address returned is DMA'ble
688 */ 688 */
689 if (!dma_capable(dev, dev_addr, size)) 689 if (!dma_capable(dev, dev_addr, size)) {
690 panic("map_single: bounce buffer is not DMA'ble"); 690 swiotlb_tbl_unmap_single(dev, map, size, dir);
691 dev_addr = swiotlb_virt_to_bus(dev, io_tlb_overflow_buffer);
692 }
691 693
692 return dev_addr; 694 return dev_addr;
693} 695}
diff --git a/mm/Makefile b/mm/Makefile
index 2b1b575ae712..42a8326c3e3d 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -7,7 +7,7 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \
7 mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \ 7 mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
8 vmalloc.o pagewalk.o pgtable-generic.o 8 vmalloc.o pagewalk.o pgtable-generic.o
9 9
10obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ 10obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
11 maccess.o page_alloc.o page-writeback.o \ 11 maccess.o page_alloc.o page-writeback.o \
12 readahead.o swap.o truncate.o vmscan.o shmem.o \ 12 readahead.o swap.o truncate.o vmscan.o shmem.o \
13 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ 13 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
@@ -15,6 +15,12 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
15 $(mmu-y) 15 $(mmu-y)
16obj-y += init-mm.o 16obj-y += init-mm.o
17 17
18ifdef CONFIG_NO_BOOTMEM
19 obj-y += nobootmem.o
20else
21 obj-y += bootmem.o
22endif
23
18obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o 24obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
19 25
20obj-$(CONFIG_BOUNCE) += bounce.o 26obj-$(CONFIG_BOUNCE) += bounce.o
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 13b0caa9793c..07aeb89e396e 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -23,6 +23,13 @@
23 23
24#include "internal.h" 24#include "internal.h"
25 25
26#ifndef CONFIG_NEED_MULTIPLE_NODES
27struct pglist_data __refdata contig_page_data = {
28 .bdata = &bootmem_node_data[0]
29};
30EXPORT_SYMBOL(contig_page_data);
31#endif
32
26unsigned long max_low_pfn; 33unsigned long max_low_pfn;
27unsigned long min_low_pfn; 34unsigned long min_low_pfn;
28unsigned long max_pfn; 35unsigned long max_pfn;
@@ -35,7 +42,6 @@ unsigned long max_pfn;
35unsigned long saved_max_pfn; 42unsigned long saved_max_pfn;
36#endif 43#endif
37 44
38#ifndef CONFIG_NO_BOOTMEM
39bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata; 45bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
40 46
41static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list); 47static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list);
@@ -146,7 +152,7 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
146 min_low_pfn = start; 152 min_low_pfn = start;
147 return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages); 153 return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
148} 154}
149#endif 155
150/* 156/*
151 * free_bootmem_late - free bootmem pages directly to page allocator 157 * free_bootmem_late - free bootmem pages directly to page allocator
152 * @addr: starting address of the range 158 * @addr: starting address of the range
@@ -171,53 +177,6 @@ void __init free_bootmem_late(unsigned long addr, unsigned long size)
171 } 177 }
172} 178}
173 179
174#ifdef CONFIG_NO_BOOTMEM
175static void __init __free_pages_memory(unsigned long start, unsigned long end)
176{
177 int i;
178 unsigned long start_aligned, end_aligned;
179 int order = ilog2(BITS_PER_LONG);
180
181 start_aligned = (start + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1);
182 end_aligned = end & ~(BITS_PER_LONG - 1);
183
184 if (end_aligned <= start_aligned) {
185 for (i = start; i < end; i++)
186 __free_pages_bootmem(pfn_to_page(i), 0);
187
188 return;
189 }
190
191 for (i = start; i < start_aligned; i++)
192 __free_pages_bootmem(pfn_to_page(i), 0);
193
194 for (i = start_aligned; i < end_aligned; i += BITS_PER_LONG)
195 __free_pages_bootmem(pfn_to_page(i), order);
196
197 for (i = end_aligned; i < end; i++)
198 __free_pages_bootmem(pfn_to_page(i), 0);
199}
200
201unsigned long __init free_all_memory_core_early(int nodeid)
202{
203 int i;
204 u64 start, end;
205 unsigned long count = 0;
206 struct range *range = NULL;
207 int nr_range;
208
209 nr_range = get_free_all_memory_range(&range, nodeid);
210
211 for (i = 0; i < nr_range; i++) {
212 start = range[i].start;
213 end = range[i].end;
214 count += end - start;
215 __free_pages_memory(start, end);
216 }
217
218 return count;
219}
220#else
221static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) 180static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
222{ 181{
223 int aligned; 182 int aligned;
@@ -278,7 +237,6 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
278 237
279 return count; 238 return count;
280} 239}
281#endif
282 240
283/** 241/**
284 * free_all_bootmem_node - release a node's free pages to the buddy allocator 242 * free_all_bootmem_node - release a node's free pages to the buddy allocator
@@ -289,12 +247,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
289unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) 247unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
290{ 248{
291 register_page_bootmem_info_node(pgdat); 249 register_page_bootmem_info_node(pgdat);
292#ifdef CONFIG_NO_BOOTMEM
293 /* free_all_memory_core_early(MAX_NUMNODES) will be called later */
294 return 0;
295#else
296 return free_all_bootmem_core(pgdat->bdata); 250 return free_all_bootmem_core(pgdat->bdata);
297#endif
298} 251}
299 252
300/** 253/**
@@ -304,16 +257,6 @@ unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
304 */ 257 */
305unsigned long __init free_all_bootmem(void) 258unsigned long __init free_all_bootmem(void)
306{ 259{
307#ifdef CONFIG_NO_BOOTMEM
308 /*
309 * We need to use MAX_NUMNODES instead of NODE_DATA(0)->node_id
310 * because in some case like Node0 doesnt have RAM installed
311 * low ram will be on Node1
312 * Use MAX_NUMNODES will make sure all ranges in early_node_map[]
313 * will be used instead of only Node0 related
314 */
315 return free_all_memory_core_early(MAX_NUMNODES);
316#else
317 unsigned long total_pages = 0; 260 unsigned long total_pages = 0;
318 bootmem_data_t *bdata; 261 bootmem_data_t *bdata;
319 262
@@ -321,10 +264,8 @@ unsigned long __init free_all_bootmem(void)
321 total_pages += free_all_bootmem_core(bdata); 264 total_pages += free_all_bootmem_core(bdata);
322 265
323 return total_pages; 266 return total_pages;
324#endif
325} 267}
326 268
327#ifndef CONFIG_NO_BOOTMEM
328static void __init __free(bootmem_data_t *bdata, 269static void __init __free(bootmem_data_t *bdata,
329 unsigned long sidx, unsigned long eidx) 270 unsigned long sidx, unsigned long eidx)
330{ 271{
@@ -419,7 +360,6 @@ static int __init mark_bootmem(unsigned long start, unsigned long end,
419 } 360 }
420 BUG(); 361 BUG();
421} 362}
422#endif
423 363
424/** 364/**
425 * free_bootmem_node - mark a page range as usable 365 * free_bootmem_node - mark a page range as usable
@@ -434,10 +374,6 @@ static int __init mark_bootmem(unsigned long start, unsigned long end,
434void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, 374void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
435 unsigned long size) 375 unsigned long size)
436{ 376{
437#ifdef CONFIG_NO_BOOTMEM
438 kmemleak_free_part(__va(physaddr), size);
439 memblock_x86_free_range(physaddr, physaddr + size);
440#else
441 unsigned long start, end; 377 unsigned long start, end;
442 378
443 kmemleak_free_part(__va(physaddr), size); 379 kmemleak_free_part(__va(physaddr), size);
@@ -446,7 +382,6 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
446 end = PFN_DOWN(physaddr + size); 382 end = PFN_DOWN(physaddr + size);
447 383
448 mark_bootmem_node(pgdat->bdata, start, end, 0, 0); 384 mark_bootmem_node(pgdat->bdata, start, end, 0, 0);
449#endif
450} 385}
451 386
452/** 387/**
@@ -460,10 +395,6 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
460 */ 395 */
461void __init free_bootmem(unsigned long addr, unsigned long size) 396void __init free_bootmem(unsigned long addr, unsigned long size)
462{ 397{
463#ifdef CONFIG_NO_BOOTMEM
464 kmemleak_free_part(__va(addr), size);
465 memblock_x86_free_range(addr, addr + size);
466#else
467 unsigned long start, end; 398 unsigned long start, end;
468 399
469 kmemleak_free_part(__va(addr), size); 400 kmemleak_free_part(__va(addr), size);
@@ -472,7 +403,6 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
472 end = PFN_DOWN(addr + size); 403 end = PFN_DOWN(addr + size);
473 404
474 mark_bootmem(start, end, 0, 0); 405 mark_bootmem(start, end, 0, 0);
475#endif
476} 406}
477 407
478/** 408/**
@@ -489,17 +419,12 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
489int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, 419int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
490 unsigned long size, int flags) 420 unsigned long size, int flags)
491{ 421{
492#ifdef CONFIG_NO_BOOTMEM
493 panic("no bootmem");
494 return 0;
495#else
496 unsigned long start, end; 422 unsigned long start, end;
497 423
498 start = PFN_DOWN(physaddr); 424 start = PFN_DOWN(physaddr);
499 end = PFN_UP(physaddr + size); 425 end = PFN_UP(physaddr + size);
500 426
501 return mark_bootmem_node(pgdat->bdata, start, end, 1, flags); 427 return mark_bootmem_node(pgdat->bdata, start, end, 1, flags);
502#endif
503} 428}
504 429
505/** 430/**
@@ -515,20 +440,14 @@ int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
515int __init reserve_bootmem(unsigned long addr, unsigned long size, 440int __init reserve_bootmem(unsigned long addr, unsigned long size,
516 int flags) 441 int flags)
517{ 442{
518#ifdef CONFIG_NO_BOOTMEM
519 panic("no bootmem");
520 return 0;
521#else
522 unsigned long start, end; 443 unsigned long start, end;
523 444
524 start = PFN_DOWN(addr); 445 start = PFN_DOWN(addr);
525 end = PFN_UP(addr + size); 446 end = PFN_UP(addr + size);
526 447
527 return mark_bootmem(start, end, 1, flags); 448 return mark_bootmem(start, end, 1, flags);
528#endif
529} 449}
530 450
531#ifndef CONFIG_NO_BOOTMEM
532int __weak __init reserve_bootmem_generic(unsigned long phys, unsigned long len, 451int __weak __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
533 int flags) 452 int flags)
534{ 453{
@@ -685,33 +604,12 @@ static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata,
685#endif 604#endif
686 return NULL; 605 return NULL;
687} 606}
688#endif
689 607
690static void * __init ___alloc_bootmem_nopanic(unsigned long size, 608static void * __init ___alloc_bootmem_nopanic(unsigned long size,
691 unsigned long align, 609 unsigned long align,
692 unsigned long goal, 610 unsigned long goal,
693 unsigned long limit) 611 unsigned long limit)
694{ 612{
695#ifdef CONFIG_NO_BOOTMEM
696 void *ptr;
697
698 if (WARN_ON_ONCE(slab_is_available()))
699 return kzalloc(size, GFP_NOWAIT);
700
701restart:
702
703 ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, limit);
704
705 if (ptr)
706 return ptr;
707
708 if (goal != 0) {
709 goal = 0;
710 goto restart;
711 }
712
713 return NULL;
714#else
715 bootmem_data_t *bdata; 613 bootmem_data_t *bdata;
716 void *region; 614 void *region;
717 615
@@ -737,7 +635,6 @@ restart:
737 } 635 }
738 636
739 return NULL; 637 return NULL;
740#endif
741} 638}
742 639
743/** 640/**
@@ -758,10 +655,6 @@ void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
758{ 655{
759 unsigned long limit = 0; 656 unsigned long limit = 0;
760 657
761#ifdef CONFIG_NO_BOOTMEM
762 limit = -1UL;
763#endif
764
765 return ___alloc_bootmem_nopanic(size, align, goal, limit); 658 return ___alloc_bootmem_nopanic(size, align, goal, limit);
766} 659}
767 660
@@ -798,14 +691,9 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align,
798{ 691{
799 unsigned long limit = 0; 692 unsigned long limit = 0;
800 693
801#ifdef CONFIG_NO_BOOTMEM
802 limit = -1UL;
803#endif
804
805 return ___alloc_bootmem(size, align, goal, limit); 694 return ___alloc_bootmem(size, align, goal, limit);
806} 695}
807 696
808#ifndef CONFIG_NO_BOOTMEM
809static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, 697static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
810 unsigned long size, unsigned long align, 698 unsigned long size, unsigned long align,
811 unsigned long goal, unsigned long limit) 699 unsigned long goal, unsigned long limit)
@@ -822,7 +710,6 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
822 710
823 return ___alloc_bootmem(size, align, goal, limit); 711 return ___alloc_bootmem(size, align, goal, limit);
824} 712}
825#endif
826 713
827/** 714/**
828 * __alloc_bootmem_node - allocate boot memory from a specific node 715 * __alloc_bootmem_node - allocate boot memory from a specific node
@@ -842,24 +729,10 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
842void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, 729void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
843 unsigned long align, unsigned long goal) 730 unsigned long align, unsigned long goal)
844{ 731{
845 void *ptr;
846
847 if (WARN_ON_ONCE(slab_is_available())) 732 if (WARN_ON_ONCE(slab_is_available()))
848 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); 733 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
849 734
850#ifdef CONFIG_NO_BOOTMEM 735 return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
851 ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
852 goal, -1ULL);
853 if (ptr)
854 return ptr;
855
856 ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
857 goal, -1ULL);
858#else
859 ptr = ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
860#endif
861
862 return ptr;
863} 736}
864 737
865void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size, 738void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
@@ -880,13 +753,8 @@ void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
880 unsigned long new_goal; 753 unsigned long new_goal;
881 754
882 new_goal = MAX_DMA32_PFN << PAGE_SHIFT; 755 new_goal = MAX_DMA32_PFN << PAGE_SHIFT;
883#ifdef CONFIG_NO_BOOTMEM
884 ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
885 new_goal, -1ULL);
886#else
887 ptr = alloc_bootmem_core(pgdat->bdata, size, align, 756 ptr = alloc_bootmem_core(pgdat->bdata, size, align,
888 new_goal, 0); 757 new_goal, 0);
889#endif
890 if (ptr) 758 if (ptr)
891 return ptr; 759 return ptr;
892 } 760 }
@@ -907,16 +775,6 @@ void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
907void * __init alloc_bootmem_section(unsigned long size, 775void * __init alloc_bootmem_section(unsigned long size,
908 unsigned long section_nr) 776 unsigned long section_nr)
909{ 777{
910#ifdef CONFIG_NO_BOOTMEM
911 unsigned long pfn, goal, limit;
912
913 pfn = section_nr_to_pfn(section_nr);
914 goal = pfn << PAGE_SHIFT;
915 limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;
916
917 return __alloc_memory_core_early(early_pfn_to_nid(pfn), size,
918 SMP_CACHE_BYTES, goal, limit);
919#else
920 bootmem_data_t *bdata; 778 bootmem_data_t *bdata;
921 unsigned long pfn, goal, limit; 779 unsigned long pfn, goal, limit;
922 780
@@ -926,7 +784,6 @@ void * __init alloc_bootmem_section(unsigned long size,
926 bdata = &bootmem_node_data[early_pfn_to_nid(pfn)]; 784 bdata = &bootmem_node_data[early_pfn_to_nid(pfn)];
927 785
928 return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit); 786 return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit);
929#endif
930} 787}
931#endif 788#endif
932 789
@@ -938,16 +795,11 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
938 if (WARN_ON_ONCE(slab_is_available())) 795 if (WARN_ON_ONCE(slab_is_available()))
939 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); 796 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
940 797
941#ifdef CONFIG_NO_BOOTMEM
942 ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
943 goal, -1ULL);
944#else
945 ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0); 798 ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0);
946 if (ptr) 799 if (ptr)
947 return ptr; 800 return ptr;
948 801
949 ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); 802 ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
950#endif
951 if (ptr) 803 if (ptr)
952 return ptr; 804 return ptr;
953 805
@@ -995,21 +847,9 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
995void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, 847void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
996 unsigned long align, unsigned long goal) 848 unsigned long align, unsigned long goal)
997{ 849{
998 void *ptr;
999
1000 if (WARN_ON_ONCE(slab_is_available())) 850 if (WARN_ON_ONCE(slab_is_available()))
1001 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); 851 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
1002 852
1003#ifdef CONFIG_NO_BOOTMEM 853 return ___alloc_bootmem_node(pgdat->bdata, size, align,
1004 ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
1005 goal, ARCH_LOW_ADDRESS_LIMIT); 854 goal, ARCH_LOW_ADDRESS_LIMIT);
1006 if (ptr)
1007 return ptr;
1008 ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
1009 goal, ARCH_LOW_ADDRESS_LIMIT);
1010#else
1011 ptr = ___alloc_bootmem_node(pgdat->bdata, size, align,
1012 goal, ARCH_LOW_ADDRESS_LIMIT);
1013#endif
1014 return ptr;
1015} 855}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 3e29781ee762..113e35c47502 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -650,10 +650,10 @@ static inline gfp_t alloc_hugepage_gfpmask(int defrag)
650 650
651static inline struct page *alloc_hugepage_vma(int defrag, 651static inline struct page *alloc_hugepage_vma(int defrag,
652 struct vm_area_struct *vma, 652 struct vm_area_struct *vma,
653 unsigned long haddr) 653 unsigned long haddr, int nd)
654{ 654{
655 return alloc_pages_vma(alloc_hugepage_gfpmask(defrag), 655 return alloc_pages_vma(alloc_hugepage_gfpmask(defrag),
656 HPAGE_PMD_ORDER, vma, haddr); 656 HPAGE_PMD_ORDER, vma, haddr, nd);
657} 657}
658 658
659#ifndef CONFIG_NUMA 659#ifndef CONFIG_NUMA
@@ -678,7 +678,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
678 if (unlikely(khugepaged_enter(vma))) 678 if (unlikely(khugepaged_enter(vma)))
679 return VM_FAULT_OOM; 679 return VM_FAULT_OOM;
680 page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), 680 page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
681 vma, haddr); 681 vma, haddr, numa_node_id());
682 if (unlikely(!page)) 682 if (unlikely(!page))
683 goto out; 683 goto out;
684 if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { 684 if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
@@ -799,8 +799,8 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
799 } 799 }
800 800
801 for (i = 0; i < HPAGE_PMD_NR; i++) { 801 for (i = 0; i < HPAGE_PMD_NR; i++) {
802 pages[i] = alloc_page_vma(GFP_HIGHUSER_MOVABLE, 802 pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE,
803 vma, address); 803 vma, address, page_to_nid(page));
804 if (unlikely(!pages[i] || 804 if (unlikely(!pages[i] ||
805 mem_cgroup_newpage_charge(pages[i], mm, 805 mem_cgroup_newpage_charge(pages[i], mm,
806 GFP_KERNEL))) { 806 GFP_KERNEL))) {
@@ -902,7 +902,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
902 if (transparent_hugepage_enabled(vma) && 902 if (transparent_hugepage_enabled(vma) &&
903 !transparent_hugepage_debug_cow()) 903 !transparent_hugepage_debug_cow())
904 new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), 904 new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
905 vma, haddr); 905 vma, haddr, numa_node_id());
906 else 906 else
907 new_page = NULL; 907 new_page = NULL;
908 908
@@ -1745,7 +1745,8 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
1745static void collapse_huge_page(struct mm_struct *mm, 1745static void collapse_huge_page(struct mm_struct *mm,
1746 unsigned long address, 1746 unsigned long address,
1747 struct page **hpage, 1747 struct page **hpage,
1748 struct vm_area_struct *vma) 1748 struct vm_area_struct *vma,
1749 int node)
1749{ 1750{
1750 pgd_t *pgd; 1751 pgd_t *pgd;
1751 pud_t *pud; 1752 pud_t *pud;
@@ -1761,6 +1762,10 @@ static void collapse_huge_page(struct mm_struct *mm,
1761#ifndef CONFIG_NUMA 1762#ifndef CONFIG_NUMA
1762 VM_BUG_ON(!*hpage); 1763 VM_BUG_ON(!*hpage);
1763 new_page = *hpage; 1764 new_page = *hpage;
1765 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
1766 up_read(&mm->mmap_sem);
1767 return;
1768 }
1764#else 1769#else
1765 VM_BUG_ON(*hpage); 1770 VM_BUG_ON(*hpage);
1766 /* 1771 /*
@@ -1773,18 +1778,19 @@ static void collapse_huge_page(struct mm_struct *mm,
1773 * mmap_sem in read mode is good idea also to allow greater 1778 * mmap_sem in read mode is good idea also to allow greater
1774 * scalability. 1779 * scalability.
1775 */ 1780 */
1776 new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address); 1781 new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address,
1782 node);
1777 if (unlikely(!new_page)) { 1783 if (unlikely(!new_page)) {
1778 up_read(&mm->mmap_sem); 1784 up_read(&mm->mmap_sem);
1779 *hpage = ERR_PTR(-ENOMEM); 1785 *hpage = ERR_PTR(-ENOMEM);
1780 return; 1786 return;
1781 } 1787 }
1782#endif
1783 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { 1788 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
1784 up_read(&mm->mmap_sem); 1789 up_read(&mm->mmap_sem);
1785 put_page(new_page); 1790 put_page(new_page);
1786 return; 1791 return;
1787 } 1792 }
1793#endif
1788 1794
1789 /* after allocating the hugepage upgrade to mmap_sem write mode */ 1795 /* after allocating the hugepage upgrade to mmap_sem write mode */
1790 up_read(&mm->mmap_sem); 1796 up_read(&mm->mmap_sem);
@@ -1919,6 +1925,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
1919 struct page *page; 1925 struct page *page;
1920 unsigned long _address; 1926 unsigned long _address;
1921 spinlock_t *ptl; 1927 spinlock_t *ptl;
1928 int node = -1;
1922 1929
1923 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 1930 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
1924 1931
@@ -1949,6 +1956,13 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
1949 page = vm_normal_page(vma, _address, pteval); 1956 page = vm_normal_page(vma, _address, pteval);
1950 if (unlikely(!page)) 1957 if (unlikely(!page))
1951 goto out_unmap; 1958 goto out_unmap;
1959 /*
1960 * Chose the node of the first page. This could
1961 * be more sophisticated and look at more pages,
1962 * but isn't for now.
1963 */
1964 if (node == -1)
1965 node = page_to_nid(page);
1952 VM_BUG_ON(PageCompound(page)); 1966 VM_BUG_ON(PageCompound(page));
1953 if (!PageLRU(page) || PageLocked(page) || !PageAnon(page)) 1967 if (!PageLRU(page) || PageLocked(page) || !PageAnon(page))
1954 goto out_unmap; 1968 goto out_unmap;
@@ -1965,7 +1979,7 @@ out_unmap:
1965 pte_unmap_unlock(pte, ptl); 1979 pte_unmap_unlock(pte, ptl);
1966 if (ret) 1980 if (ret)
1967 /* collapse_huge_page will return with the mmap_sem released */ 1981 /* collapse_huge_page will return with the mmap_sem released */
1968 collapse_huge_page(mm, address, hpage, vma); 1982 collapse_huge_page(mm, address, hpage, vma, node);
1969out: 1983out:
1970 return ret; 1984 return ret;
1971} 1985}
diff --git a/mm/memory.c b/mm/memory.c
index 8e8c18324863..5823698c2b71 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2648,6 +2648,7 @@ void unmap_mapping_range(struct address_space *mapping,
2648 details.last_index = ULONG_MAX; 2648 details.last_index = ULONG_MAX;
2649 details.i_mmap_lock = &mapping->i_mmap_lock; 2649 details.i_mmap_lock = &mapping->i_mmap_lock;
2650 2650
2651 mutex_lock(&mapping->unmap_mutex);
2651 spin_lock(&mapping->i_mmap_lock); 2652 spin_lock(&mapping->i_mmap_lock);
2652 2653
2653 /* Protect against endless unmapping loops */ 2654 /* Protect against endless unmapping loops */
@@ -2664,6 +2665,7 @@ void unmap_mapping_range(struct address_space *mapping,
2664 if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) 2665 if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
2665 unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details); 2666 unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
2666 spin_unlock(&mapping->i_mmap_lock); 2667 spin_unlock(&mapping->i_mmap_lock);
2668 mutex_unlock(&mapping->unmap_mutex);
2667} 2669}
2668EXPORT_SYMBOL(unmap_mapping_range); 2670EXPORT_SYMBOL(unmap_mapping_range);
2669 2671
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 368fc9d23610..b53ec99f1428 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1524,10 +1524,9 @@ static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy)
1524} 1524}
1525 1525
1526/* Return a zonelist indicated by gfp for node representing a mempolicy */ 1526/* Return a zonelist indicated by gfp for node representing a mempolicy */
1527static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy) 1527static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy,
1528 int nd)
1528{ 1529{
1529 int nd = numa_node_id();
1530
1531 switch (policy->mode) { 1530 switch (policy->mode) {
1532 case MPOL_PREFERRED: 1531 case MPOL_PREFERRED:
1533 if (!(policy->flags & MPOL_F_LOCAL)) 1532 if (!(policy->flags & MPOL_F_LOCAL))
@@ -1679,7 +1678,7 @@ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
1679 zl = node_zonelist(interleave_nid(*mpol, vma, addr, 1678 zl = node_zonelist(interleave_nid(*mpol, vma, addr,
1680 huge_page_shift(hstate_vma(vma))), gfp_flags); 1679 huge_page_shift(hstate_vma(vma))), gfp_flags);
1681 } else { 1680 } else {
1682 zl = policy_zonelist(gfp_flags, *mpol); 1681 zl = policy_zonelist(gfp_flags, *mpol, numa_node_id());
1683 if ((*mpol)->mode == MPOL_BIND) 1682 if ((*mpol)->mode == MPOL_BIND)
1684 *nodemask = &(*mpol)->v.nodes; 1683 *nodemask = &(*mpol)->v.nodes;
1685 } 1684 }
@@ -1820,7 +1819,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
1820 */ 1819 */
1821struct page * 1820struct page *
1822alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, 1821alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
1823 unsigned long addr) 1822 unsigned long addr, int node)
1824{ 1823{
1825 struct mempolicy *pol = get_vma_policy(current, vma, addr); 1824 struct mempolicy *pol = get_vma_policy(current, vma, addr);
1826 struct zonelist *zl; 1825 struct zonelist *zl;
@@ -1830,13 +1829,13 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
1830 if (unlikely(pol->mode == MPOL_INTERLEAVE)) { 1829 if (unlikely(pol->mode == MPOL_INTERLEAVE)) {
1831 unsigned nid; 1830 unsigned nid;
1832 1831
1833 nid = interleave_nid(pol, vma, addr, PAGE_SHIFT); 1832 nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
1834 mpol_cond_put(pol); 1833 mpol_cond_put(pol);
1835 page = alloc_page_interleave(gfp, order, nid); 1834 page = alloc_page_interleave(gfp, order, nid);
1836 put_mems_allowed(); 1835 put_mems_allowed();
1837 return page; 1836 return page;
1838 } 1837 }
1839 zl = policy_zonelist(gfp, pol); 1838 zl = policy_zonelist(gfp, pol, node);
1840 if (unlikely(mpol_needs_cond_ref(pol))) { 1839 if (unlikely(mpol_needs_cond_ref(pol))) {
1841 /* 1840 /*
1842 * slow path: ref counted shared policy 1841 * slow path: ref counted shared policy
@@ -1892,7 +1891,8 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
1892 page = alloc_page_interleave(gfp, order, interleave_nodes(pol)); 1891 page = alloc_page_interleave(gfp, order, interleave_nodes(pol));
1893 else 1892 else
1894 page = __alloc_pages_nodemask(gfp, order, 1893 page = __alloc_pages_nodemask(gfp, order,
1895 policy_zonelist(gfp, pol), policy_nodemask(gfp, pol)); 1894 policy_zonelist(gfp, pol, numa_node_id()),
1895 policy_nodemask(gfp, pol));
1896 put_mems_allowed(); 1896 put_mems_allowed();
1897 return page; 1897 return page;
1898} 1898}
diff --git a/mm/migrate.c b/mm/migrate.c
index 766115253807..352de555626c 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1287,14 +1287,14 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
1287 return -EPERM; 1287 return -EPERM;
1288 1288
1289 /* Find the mm_struct */ 1289 /* Find the mm_struct */
1290 read_lock(&tasklist_lock); 1290 rcu_read_lock();
1291 task = pid ? find_task_by_vpid(pid) : current; 1291 task = pid ? find_task_by_vpid(pid) : current;
1292 if (!task) { 1292 if (!task) {
1293 read_unlock(&tasklist_lock); 1293 rcu_read_unlock();
1294 return -ESRCH; 1294 return -ESRCH;
1295 } 1295 }
1296 mm = get_task_mm(task); 1296 mm = get_task_mm(task);
1297 read_unlock(&tasklist_lock); 1297 rcu_read_unlock();
1298 1298
1299 if (!mm) 1299 if (!mm)
1300 return -EINVAL; 1300 return -EINVAL;
diff --git a/mm/mremap.c b/mm/mremap.c
index 9925b6391b80..1de98d492ddc 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -94,9 +94,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
94 */ 94 */
95 mapping = vma->vm_file->f_mapping; 95 mapping = vma->vm_file->f_mapping;
96 spin_lock(&mapping->i_mmap_lock); 96 spin_lock(&mapping->i_mmap_lock);
97 if (new_vma->vm_truncate_count && 97 new_vma->vm_truncate_count = 0;
98 new_vma->vm_truncate_count != vma->vm_truncate_count)
99 new_vma->vm_truncate_count = 0;
100 } 98 }
101 99
102 /* 100 /*
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
new file mode 100644
index 000000000000..e2bdb07079ce
--- /dev/null
+++ b/mm/nobootmem.c
@@ -0,0 +1,435 @@
1/*
2 * bootmem - A boot-time physical memory allocator and configurator
3 *
4 * Copyright (C) 1999 Ingo Molnar
5 * 1999 Kanoj Sarcar, SGI
6 * 2008 Johannes Weiner
7 *
8 * Access to this subsystem has to be serialized externally (which is true
9 * for the boot process anyway).
10 */
11#include <linux/init.h>
12#include <linux/pfn.h>
13#include <linux/slab.h>
14#include <linux/bootmem.h>
15#include <linux/module.h>
16#include <linux/kmemleak.h>
17#include <linux/range.h>
18#include <linux/memblock.h>
19
20#include <asm/bug.h>
21#include <asm/io.h>
22#include <asm/processor.h>
23
24#include "internal.h"
25
26#ifndef CONFIG_NEED_MULTIPLE_NODES
27struct pglist_data __refdata contig_page_data;
28EXPORT_SYMBOL(contig_page_data);
29#endif
30
31unsigned long max_low_pfn;
32unsigned long min_low_pfn;
33unsigned long max_pfn;
34
35#ifdef CONFIG_CRASH_DUMP
36/*
37 * If we have booted due to a crash, max_pfn will be a very low value. We need
38 * to know the amount of memory that the previous kernel used.
39 */
40unsigned long saved_max_pfn;
41#endif
42
43static void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
44 u64 goal, u64 limit)
45{
46 void *ptr;
47 u64 addr;
48
49 if (limit > memblock.current_limit)
50 limit = memblock.current_limit;
51
52 addr = find_memory_core_early(nid, size, align, goal, limit);
53
54 if (addr == MEMBLOCK_ERROR)
55 return NULL;
56
57 ptr = phys_to_virt(addr);
58 memset(ptr, 0, size);
59 memblock_x86_reserve_range(addr, addr + size, "BOOTMEM");
60 /*
61 * The min_count is set to 0 so that bootmem allocated blocks
62 * are never reported as leaks.
63 */
64 kmemleak_alloc(ptr, size, 0, 0);
65 return ptr;
66}
67
68/*
69 * free_bootmem_late - free bootmem pages directly to page allocator
70 * @addr: starting address of the range
71 * @size: size of the range in bytes
72 *
73 * This is only useful when the bootmem allocator has already been torn
74 * down, but we are still initializing the system. Pages are given directly
75 * to the page allocator, no bootmem metadata is updated because it is gone.
76 */
77void __init free_bootmem_late(unsigned long addr, unsigned long size)
78{
79 unsigned long cursor, end;
80
81 kmemleak_free_part(__va(addr), size);
82
83 cursor = PFN_UP(addr);
84 end = PFN_DOWN(addr + size);
85
86 for (; cursor < end; cursor++) {
87 __free_pages_bootmem(pfn_to_page(cursor), 0);
88 totalram_pages++;
89 }
90}
91
92static void __init __free_pages_memory(unsigned long start, unsigned long end)
93{
94 int i;
95 unsigned long start_aligned, end_aligned;
96 int order = ilog2(BITS_PER_LONG);
97
98 start_aligned = (start + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1);
99 end_aligned = end & ~(BITS_PER_LONG - 1);
100
101 if (end_aligned <= start_aligned) {
102 for (i = start; i < end; i++)
103 __free_pages_bootmem(pfn_to_page(i), 0);
104
105 return;
106 }
107
108 for (i = start; i < start_aligned; i++)
109 __free_pages_bootmem(pfn_to_page(i), 0);
110
111 for (i = start_aligned; i < end_aligned; i += BITS_PER_LONG)
112 __free_pages_bootmem(pfn_to_page(i), order);
113
114 for (i = end_aligned; i < end; i++)
115 __free_pages_bootmem(pfn_to_page(i), 0);
116}
117
118unsigned long __init free_all_memory_core_early(int nodeid)
119{
120 int i;
121 u64 start, end;
122 unsigned long count = 0;
123 struct range *range = NULL;
124 int nr_range;
125
126 nr_range = get_free_all_memory_range(&range, nodeid);
127
128 for (i = 0; i < nr_range; i++) {
129 start = range[i].start;
130 end = range[i].end;
131 count += end - start;
132 __free_pages_memory(start, end);
133 }
134
135 return count;
136}
137
138/**
139 * free_all_bootmem_node - release a node's free pages to the buddy allocator
140 * @pgdat: node to be released
141 *
142 * Returns the number of pages actually released.
143 */
144unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
145{
146 register_page_bootmem_info_node(pgdat);
147
148 /* free_all_memory_core_early(MAX_NUMNODES) will be called later */
149 return 0;
150}
151
152/**
153 * free_all_bootmem - release free pages to the buddy allocator
154 *
155 * Returns the number of pages actually released.
156 */
157unsigned long __init free_all_bootmem(void)
158{
159 /*
160 * We need to use MAX_NUMNODES instead of NODE_DATA(0)->node_id
161 * because in some case like Node0 doesnt have RAM installed
162 * low ram will be on Node1
163 * Use MAX_NUMNODES will make sure all ranges in early_node_map[]
164 * will be used instead of only Node0 related
165 */
166 return free_all_memory_core_early(MAX_NUMNODES);
167}
168
169/**
170 * free_bootmem_node - mark a page range as usable
171 * @pgdat: node the range resides on
172 * @physaddr: starting address of the range
173 * @size: size of the range in bytes
174 *
175 * Partial pages will be considered reserved and left as they are.
176 *
177 * The range must reside completely on the specified node.
178 */
179void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
180 unsigned long size)
181{
182 kmemleak_free_part(__va(physaddr), size);
183 memblock_x86_free_range(physaddr, physaddr + size);
184}
185
186/**
187 * free_bootmem - mark a page range as usable
188 * @addr: starting address of the range
189 * @size: size of the range in bytes
190 *
191 * Partial pages will be considered reserved and left as they are.
192 *
193 * The range must be contiguous but may span node boundaries.
194 */
195void __init free_bootmem(unsigned long addr, unsigned long size)
196{
197 kmemleak_free_part(__va(addr), size);
198 memblock_x86_free_range(addr, addr + size);
199}
200
201static void * __init ___alloc_bootmem_nopanic(unsigned long size,
202 unsigned long align,
203 unsigned long goal,
204 unsigned long limit)
205{
206 void *ptr;
207
208 if (WARN_ON_ONCE(slab_is_available()))
209 return kzalloc(size, GFP_NOWAIT);
210
211restart:
212
213 ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, limit);
214
215 if (ptr)
216 return ptr;
217
218 if (goal != 0) {
219 goal = 0;
220 goto restart;
221 }
222
223 return NULL;
224}
225
226/**
227 * __alloc_bootmem_nopanic - allocate boot memory without panicking
228 * @size: size of the request in bytes
229 * @align: alignment of the region
230 * @goal: preferred starting address of the region
231 *
232 * The goal is dropped if it can not be satisfied and the allocation will
233 * fall back to memory below @goal.
234 *
235 * Allocation may happen on any node in the system.
236 *
237 * Returns NULL on failure.
238 */
239void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
240 unsigned long goal)
241{
242 unsigned long limit = -1UL;
243
244 return ___alloc_bootmem_nopanic(size, align, goal, limit);
245}
246
247static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
248 unsigned long goal, unsigned long limit)
249{
250 void *mem = ___alloc_bootmem_nopanic(size, align, goal, limit);
251
252 if (mem)
253 return mem;
254 /*
255 * Whoops, we cannot satisfy the allocation request.
256 */
257 printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size);
258 panic("Out of memory");
259 return NULL;
260}
261
262/**
263 * __alloc_bootmem - allocate boot memory
264 * @size: size of the request in bytes
265 * @align: alignment of the region
266 * @goal: preferred starting address of the region
267 *
268 * The goal is dropped if it can not be satisfied and the allocation will
269 * fall back to memory below @goal.
270 *
271 * Allocation may happen on any node in the system.
272 *
273 * The function panics if the request can not be satisfied.
274 */
275void * __init __alloc_bootmem(unsigned long size, unsigned long align,
276 unsigned long goal)
277{
278 unsigned long limit = -1UL;
279
280 return ___alloc_bootmem(size, align, goal, limit);
281}
282
283/**
284 * __alloc_bootmem_node - allocate boot memory from a specific node
285 * @pgdat: node to allocate from
286 * @size: size of the request in bytes
287 * @align: alignment of the region
288 * @goal: preferred starting address of the region
289 *
290 * The goal is dropped if it can not be satisfied and the allocation will
291 * fall back to memory below @goal.
292 *
293 * Allocation may fall back to any node in the system if the specified node
294 * can not hold the requested memory.
295 *
296 * The function panics if the request can not be satisfied.
297 */
298void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
299 unsigned long align, unsigned long goal)
300{
301 void *ptr;
302
303 if (WARN_ON_ONCE(slab_is_available()))
304 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
305
306 ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
307 goal, -1ULL);
308 if (ptr)
309 return ptr;
310
311 return __alloc_memory_core_early(MAX_NUMNODES, size, align,
312 goal, -1ULL);
313}
314
315void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
316 unsigned long align, unsigned long goal)
317{
318#ifdef MAX_DMA32_PFN
319 unsigned long end_pfn;
320
321 if (WARN_ON_ONCE(slab_is_available()))
322 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
323
324 /* update goal according ...MAX_DMA32_PFN */
325 end_pfn = pgdat->node_start_pfn + pgdat->node_spanned_pages;
326
327 if (end_pfn > MAX_DMA32_PFN + (128 >> (20 - PAGE_SHIFT)) &&
328 (goal >> PAGE_SHIFT) < MAX_DMA32_PFN) {
329 void *ptr;
330 unsigned long new_goal;
331
332 new_goal = MAX_DMA32_PFN << PAGE_SHIFT;
333 ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
334 new_goal, -1ULL);
335 if (ptr)
336 return ptr;
337 }
338#endif
339
340 return __alloc_bootmem_node(pgdat, size, align, goal);
341
342}
343
344#ifdef CONFIG_SPARSEMEM
345/**
346 * alloc_bootmem_section - allocate boot memory from a specific section
347 * @size: size of the request in bytes
348 * @section_nr: sparse map section to allocate from
349 *
350 * Return NULL on failure.
351 */
352void * __init alloc_bootmem_section(unsigned long size,
353 unsigned long section_nr)
354{
355 unsigned long pfn, goal, limit;
356
357 pfn = section_nr_to_pfn(section_nr);
358 goal = pfn << PAGE_SHIFT;
359 limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;
360
361 return __alloc_memory_core_early(early_pfn_to_nid(pfn), size,
362 SMP_CACHE_BYTES, goal, limit);
363}
364#endif
365
366void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
367 unsigned long align, unsigned long goal)
368{
369 void *ptr;
370
371 if (WARN_ON_ONCE(slab_is_available()))
372 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
373
374 ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
375 goal, -1ULL);
376 if (ptr)
377 return ptr;
378
379 return __alloc_bootmem_nopanic(size, align, goal);
380}
381
382#ifndef ARCH_LOW_ADDRESS_LIMIT
383#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL
384#endif
385
386/**
387 * __alloc_bootmem_low - allocate low boot memory
388 * @size: size of the request in bytes
389 * @align: alignment of the region
390 * @goal: preferred starting address of the region
391 *
392 * The goal is dropped if it can not be satisfied and the allocation will
393 * fall back to memory below @goal.
394 *
395 * Allocation may happen on any node in the system.
396 *
397 * The function panics if the request can not be satisfied.
398 */
399void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
400 unsigned long goal)
401{
402 return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
403}
404
405/**
406 * __alloc_bootmem_low_node - allocate low boot memory from a specific node
407 * @pgdat: node to allocate from
408 * @size: size of the request in bytes
409 * @align: alignment of the region
410 * @goal: preferred starting address of the region
411 *
412 * The goal is dropped if it can not be satisfied and the allocation will
413 * fall back to memory below @goal.
414 *
415 * Allocation may fall back to any node in the system if the specified node
416 * can not hold the requested memory.
417 *
418 * The function panics if the request can not be satisfied.
419 */
420void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
421 unsigned long align, unsigned long goal)
422{
423 void *ptr;
424
425 if (WARN_ON_ONCE(slab_is_available()))
426 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
427
428 ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
429 goal, ARCH_LOW_ADDRESS_LIMIT);
430 if (ptr)
431 return ptr;
432
433 return __alloc_memory_core_early(MAX_NUMNODES, size, align,
434 goal, ARCH_LOW_ADDRESS_LIMIT);
435}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a873e61e312e..bd7625676a64 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3699,13 +3699,45 @@ void __init free_bootmem_with_active_regions(int nid,
3699} 3699}
3700 3700
3701#ifdef CONFIG_HAVE_MEMBLOCK 3701#ifdef CONFIG_HAVE_MEMBLOCK
3702/*
3703 * Basic iterator support. Return the last range of PFNs for a node
3704 * Note: nid == MAX_NUMNODES returns last region regardless of node
3705 */
3706static int __meminit last_active_region_index_in_nid(int nid)
3707{
3708 int i;
3709
3710 for (i = nr_nodemap_entries - 1; i >= 0; i--)
3711 if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
3712 return i;
3713
3714 return -1;
3715}
3716
3717/*
3718 * Basic iterator support. Return the previous active range of PFNs for a node
3719 * Note: nid == MAX_NUMNODES returns next region regardless of node
3720 */
3721static int __meminit previous_active_region_index_in_nid(int index, int nid)
3722{
3723 for (index = index - 1; index >= 0; index--)
3724 if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
3725 return index;
3726
3727 return -1;
3728}
3729
3730#define for_each_active_range_index_in_nid_reverse(i, nid) \
3731 for (i = last_active_region_index_in_nid(nid); i != -1; \
3732 i = previous_active_region_index_in_nid(i, nid))
3733
3702u64 __init find_memory_core_early(int nid, u64 size, u64 align, 3734u64 __init find_memory_core_early(int nid, u64 size, u64 align,
3703 u64 goal, u64 limit) 3735 u64 goal, u64 limit)
3704{ 3736{
3705 int i; 3737 int i;
3706 3738
3707 /* Need to go over early_node_map to find out good range for node */ 3739 /* Need to go over early_node_map to find out good range for node */
3708 for_each_active_range_index_in_nid(i, nid) { 3740 for_each_active_range_index_in_nid_reverse(i, nid) {
3709 u64 addr; 3741 u64 addr;
3710 u64 ei_start, ei_last; 3742 u64 ei_start, ei_last;
3711 u64 final_start, final_end; 3743 u64 final_start, final_end;
@@ -3748,34 +3780,6 @@ int __init add_from_early_node_map(struct range *range, int az,
3748 return nr_range; 3780 return nr_range;
3749} 3781}
3750 3782
3751#ifdef CONFIG_NO_BOOTMEM
3752void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
3753 u64 goal, u64 limit)
3754{
3755 void *ptr;
3756 u64 addr;
3757
3758 if (limit > memblock.current_limit)
3759 limit = memblock.current_limit;
3760
3761 addr = find_memory_core_early(nid, size, align, goal, limit);
3762
3763 if (addr == MEMBLOCK_ERROR)
3764 return NULL;
3765
3766 ptr = phys_to_virt(addr);
3767 memset(ptr, 0, size);
3768 memblock_x86_reserve_range(addr, addr + size, "BOOTMEM");
3769 /*
3770 * The min_count is set to 0 so that bootmem allocated blocks
3771 * are never reported as leaks.
3772 */
3773 kmemleak_alloc(ptr, size, 0, 0);
3774 return ptr;
3775}
3776#endif
3777
3778
3779void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data) 3783void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
3780{ 3784{
3781 int i; 3785 int i;
@@ -4809,15 +4813,6 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)
4809 dma_reserve = new_dma_reserve; 4813 dma_reserve = new_dma_reserve;
4810} 4814}
4811 4815
4812#ifndef CONFIG_NEED_MULTIPLE_NODES
4813struct pglist_data __refdata contig_page_data = {
4814#ifndef CONFIG_NO_BOOTMEM
4815 .bdata = &bootmem_node_data[0]
4816#endif
4817 };
4818EXPORT_SYMBOL(contig_page_data);
4819#endif
4820
4821void __init free_area_init(unsigned long *zones_size) 4816void __init free_area_init(unsigned long *zones_size)
4822{ 4817{
4823 free_area_init_node(0, zones_size, 4818 free_area_init_node(0, zones_size,
@@ -5376,10 +5371,9 @@ __count_immobile_pages(struct zone *zone, struct page *page, int count)
5376 for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) { 5371 for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) {
5377 unsigned long check = pfn + iter; 5372 unsigned long check = pfn + iter;
5378 5373
5379 if (!pfn_valid_within(check)) { 5374 if (!pfn_valid_within(check))
5380 iter++;
5381 continue; 5375 continue;
5382 } 5376
5383 page = pfn_to_page(check); 5377 page = pfn_to_page(check);
5384 if (!page_count(page)) { 5378 if (!page_count(page)) {
5385 if (PageBuddy(page)) 5379 if (PageBuddy(page))
diff --git a/mm/rmap.c b/mm/rmap.c
index f21f4a1d6a1c..941bf82e8961 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -497,41 +497,51 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
497 struct mm_struct *mm = vma->vm_mm; 497 struct mm_struct *mm = vma->vm_mm;
498 int referenced = 0; 498 int referenced = 0;
499 499
500 /*
501 * Don't want to elevate referenced for mlocked page that gets this far,
502 * in order that it progresses to try_to_unmap and is moved to the
503 * unevictable list.
504 */
505 if (vma->vm_flags & VM_LOCKED) {
506 *mapcount = 0; /* break early from loop */
507 *vm_flags |= VM_LOCKED;
508 goto out;
509 }
510
511 /* Pretend the page is referenced if the task has the
512 swap token and is in the middle of a page fault. */
513 if (mm != current->mm && has_swap_token(mm) &&
514 rwsem_is_locked(&mm->mmap_sem))
515 referenced++;
516
517 if (unlikely(PageTransHuge(page))) { 500 if (unlikely(PageTransHuge(page))) {
518 pmd_t *pmd; 501 pmd_t *pmd;
519 502
520 spin_lock(&mm->page_table_lock); 503 spin_lock(&mm->page_table_lock);
504 /*
505 * rmap might return false positives; we must filter
506 * these out using page_check_address_pmd().
507 */
521 pmd = page_check_address_pmd(page, mm, address, 508 pmd = page_check_address_pmd(page, mm, address,
522 PAGE_CHECK_ADDRESS_PMD_FLAG); 509 PAGE_CHECK_ADDRESS_PMD_FLAG);
523 if (pmd && !pmd_trans_splitting(*pmd) && 510 if (!pmd) {
524 pmdp_clear_flush_young_notify(vma, address, pmd)) 511 spin_unlock(&mm->page_table_lock);
512 goto out;
513 }
514
515 if (vma->vm_flags & VM_LOCKED) {
516 spin_unlock(&mm->page_table_lock);
517 *mapcount = 0; /* break early from loop */
518 *vm_flags |= VM_LOCKED;
519 goto out;
520 }
521
522 /* go ahead even if the pmd is pmd_trans_splitting() */
523 if (pmdp_clear_flush_young_notify(vma, address, pmd))
525 referenced++; 524 referenced++;
526 spin_unlock(&mm->page_table_lock); 525 spin_unlock(&mm->page_table_lock);
527 } else { 526 } else {
528 pte_t *pte; 527 pte_t *pte;
529 spinlock_t *ptl; 528 spinlock_t *ptl;
530 529
530 /*
531 * rmap might return false positives; we must filter
532 * these out using page_check_address().
533 */
531 pte = page_check_address(page, mm, address, &ptl, 0); 534 pte = page_check_address(page, mm, address, &ptl, 0);
532 if (!pte) 535 if (!pte)
533 goto out; 536 goto out;
534 537
538 if (vma->vm_flags & VM_LOCKED) {
539 pte_unmap_unlock(pte, ptl);
540 *mapcount = 0; /* break early from loop */
541 *vm_flags |= VM_LOCKED;
542 goto out;
543 }
544
535 if (ptep_clear_flush_young_notify(vma, address, pte)) { 545 if (ptep_clear_flush_young_notify(vma, address, pte)) {
536 /* 546 /*
537 * Don't treat a reference through a sequentially read 547 * Don't treat a reference through a sequentially read
@@ -546,6 +556,12 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
546 pte_unmap_unlock(pte, ptl); 556 pte_unmap_unlock(pte, ptl);
547 } 557 }
548 558
559 /* Pretend the page is referenced if the task has the
560 swap token and is in the middle of a page fault. */
561 if (mm != current->mm && has_swap_token(mm) &&
562 rwsem_is_locked(&mm->mmap_sem))
563 referenced++;
564
549 (*mapcount)--; 565 (*mapcount)--;
550 566
551 if (referenced) 567 if (referenced)
diff --git a/mm/shmem.c b/mm/shmem.c
index 5ee67c990602..3437b65d6d6e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2144,8 +2144,10 @@ static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
2144{ 2144{
2145 struct inode *inode = dentry->d_inode; 2145 struct inode *inode = dentry->d_inode;
2146 2146
2147 if (*len < 3) 2147 if (*len < 3) {
2148 *len = 3;
2148 return 255; 2149 return 255;
2150 }
2149 2151
2150 if (inode_unhashed(inode)) { 2152 if (inode_unhashed(inode)) {
2151 /* Unfortunately insert_inode_hash is not idempotent, 2153 /* Unfortunately insert_inode_hash is not idempotent,
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 07a458d72fa8..0341c5700e34 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1940,7 +1940,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
1940 1940
1941 error = -EINVAL; 1941 error = -EINVAL;
1942 if (S_ISBLK(inode->i_mode)) { 1942 if (S_ISBLK(inode->i_mode)) {
1943 bdev = I_BDEV(inode); 1943 bdev = bdgrab(I_BDEV(inode));
1944 error = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, 1944 error = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL,
1945 sys_swapon); 1945 sys_swapon);
1946 if (error < 0) { 1946 if (error < 0) {
diff --git a/mm/truncate.c b/mm/truncate.c
index 49feb46e77b8..d64296be00d3 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -225,6 +225,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
225 next = start; 225 next = start;
226 while (next <= end && 226 while (next <= end &&
227 pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 227 pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
228 mem_cgroup_uncharge_start();
228 for (i = 0; i < pagevec_count(&pvec); i++) { 229 for (i = 0; i < pagevec_count(&pvec); i++) {
229 struct page *page = pvec.pages[i]; 230 struct page *page = pvec.pages[i];
230 pgoff_t page_index = page->index; 231 pgoff_t page_index = page->index;
@@ -247,6 +248,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
247 unlock_page(page); 248 unlock_page(page);
248 } 249 }
249 pagevec_release(&pvec); 250 pagevec_release(&pvec);
251 mem_cgroup_uncharge_end();
250 cond_resched(); 252 cond_resched();
251 } 253 }
252 254
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 17497d0cd8b9..6771ea70bfe7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1841,16 +1841,28 @@ static inline bool should_continue_reclaim(struct zone *zone,
1841 if (!(sc->reclaim_mode & RECLAIM_MODE_COMPACTION)) 1841 if (!(sc->reclaim_mode & RECLAIM_MODE_COMPACTION))
1842 return false; 1842 return false;
1843 1843
1844 /* 1844 /* Consider stopping depending on scan and reclaim activity */
1845 * If we failed to reclaim and have scanned the full list, stop. 1845 if (sc->gfp_mask & __GFP_REPEAT) {
1846 * NOTE: Checking just nr_reclaimed would exit reclaim/compaction far 1846 /*
1847 * faster but obviously would be less likely to succeed 1847 * For __GFP_REPEAT allocations, stop reclaiming if the
1848 * allocation. If this is desirable, use GFP_REPEAT to decide 1848 * full LRU list has been scanned and we are still failing
1849 * if both reclaimed and scanned should be checked or just 1849 * to reclaim pages. This full LRU scan is potentially
1850 * reclaimed 1850 * expensive but a __GFP_REPEAT caller really wants to succeed
1851 */ 1851 */
1852 if (!nr_reclaimed && !nr_scanned) 1852 if (!nr_reclaimed && !nr_scanned)
1853 return false; 1853 return false;
1854 } else {
1855 /*
1856 * For non-__GFP_REPEAT allocations which can presumably
1857 * fail without consequence, stop if we failed to reclaim
1858 * any pages from the last SWAP_CLUSTER_MAX number of
1859 * pages that were scanned. This will return to the
1860 * caller faster at the risk reclaim/compaction and
1861 * the resulting allocation attempt fails
1862 */
1863 if (!nr_reclaimed)
1864 return false;
1865 }
1854 1866
1855 /* 1867 /*
1856 * If we have not reclaimed enough pages for compaction and the 1868 * If we have not reclaimed enough pages for compaction and the
diff --git a/net/Makefile b/net/Makefile
index a3330ebe2c53..a51d9465e628 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -19,9 +19,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/
19obj-$(CONFIG_INET) += ipv4/ 19obj-$(CONFIG_INET) += ipv4/
20obj-$(CONFIG_XFRM) += xfrm/ 20obj-$(CONFIG_XFRM) += xfrm/
21obj-$(CONFIG_UNIX) += unix/ 21obj-$(CONFIG_UNIX) += unix/
22ifneq ($(CONFIG_IPV6),) 22obj-$(CONFIG_NET) += ipv6/
23obj-y += ipv6/
24endif
25obj-$(CONFIG_PACKET) += packet/ 23obj-$(CONFIG_PACKET) += packet/
26obj-$(CONFIG_NET_KEY) += key/ 24obj-$(CONFIG_NET_KEY) += key/
27obj-$(CONFIG_BRIDGE) += bridge/ 25obj-$(CONFIG_BRIDGE) += bridge/
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 2575c2db6404..d7b9af4703d0 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -727,7 +727,9 @@ static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp)
727 break; 727 break;
728 } 728 }
729 729
730 tty_unlock();
730 schedule(); 731 schedule();
732 tty_lock();
731 } 733 }
732 set_current_state(TASK_RUNNING); 734 set_current_state(TASK_RUNNING);
733 remove_wait_queue(&dev->wait, &wait); 735 remove_wait_queue(&dev->wait, &wait);
diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
index 9190ae462cb4..6dee7bf648a9 100644
--- a/net/bridge/Kconfig
+++ b/net/bridge/Kconfig
@@ -6,6 +6,7 @@ config BRIDGE
6 tristate "802.1d Ethernet Bridging" 6 tristate "802.1d Ethernet Bridging"
7 select LLC 7 select LLC
8 select STP 8 select STP
9 depends on IPV6 || IPV6=n
9 ---help--- 10 ---help---
10 If you say Y here, then your Linux box will be able to act as an 11 If you say Y here, then your Linux box will be able to act as an
11 Ethernet bridge, which means that the different Ethernet segments it 12 Ethernet bridge, which means that the different Ethernet segments it
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 09d5c0987925..030a002ff8ee 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -37,10 +37,9 @@
37 rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) 37 rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
38 38
39#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 39#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
40static inline int ipv6_is_local_multicast(const struct in6_addr *addr) 40static inline int ipv6_is_transient_multicast(const struct in6_addr *addr)
41{ 41{
42 if (ipv6_addr_is_multicast(addr) && 42 if (ipv6_addr_is_multicast(addr) && IPV6_ADDR_MC_FLAG_TRANSIENT(addr))
43 IPV6_ADDR_MC_SCOPE(addr) <= IPV6_ADDR_SCOPE_LINKLOCAL)
44 return 1; 43 return 1;
45 return 0; 44 return 0;
46} 45}
@@ -435,7 +434,6 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
435 eth = eth_hdr(skb); 434 eth = eth_hdr(skb);
436 435
437 memcpy(eth->h_source, br->dev->dev_addr, 6); 436 memcpy(eth->h_source, br->dev->dev_addr, 6);
438 ipv6_eth_mc_map(group, eth->h_dest);
439 eth->h_proto = htons(ETH_P_IPV6); 437 eth->h_proto = htons(ETH_P_IPV6);
440 skb_put(skb, sizeof(*eth)); 438 skb_put(skb, sizeof(*eth));
441 439
@@ -447,8 +445,10 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
447 ip6h->payload_len = htons(8 + sizeof(*mldq)); 445 ip6h->payload_len = htons(8 + sizeof(*mldq));
448 ip6h->nexthdr = IPPROTO_HOPOPTS; 446 ip6h->nexthdr = IPPROTO_HOPOPTS;
449 ip6h->hop_limit = 1; 447 ip6h->hop_limit = 1;
450 ipv6_addr_set(&ip6h->saddr, 0, 0, 0, 0); 448 ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
449 &ip6h->saddr);
451 ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1)); 450 ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
451 ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
452 452
453 hopopt = (u8 *)(ip6h + 1); 453 hopopt = (u8 *)(ip6h + 1);
454 hopopt[0] = IPPROTO_ICMPV6; /* next hdr */ 454 hopopt[0] = IPPROTO_ICMPV6; /* next hdr */
@@ -780,11 +780,11 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
780{ 780{
781 struct br_ip br_group; 781 struct br_ip br_group;
782 782
783 if (ipv6_is_local_multicast(group)) 783 if (!ipv6_is_transient_multicast(group))
784 return 0; 784 return 0;
785 785
786 ipv6_addr_copy(&br_group.u.ip6, group); 786 ipv6_addr_copy(&br_group.u.ip6, group);
787 br_group.proto = htons(ETH_P_IP); 787 br_group.proto = htons(ETH_P_IPV6);
788 788
789 return br_multicast_add_group(br, port, &br_group); 789 return br_multicast_add_group(br, port, &br_group);
790} 790}
@@ -1013,18 +1013,19 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
1013 1013
1014 nsrcs = skb_header_pointer(skb, 1014 nsrcs = skb_header_pointer(skb,
1015 len + offsetof(struct mld2_grec, 1015 len + offsetof(struct mld2_grec,
1016 grec_mca), 1016 grec_nsrcs),
1017 sizeof(_nsrcs), &_nsrcs); 1017 sizeof(_nsrcs), &_nsrcs);
1018 if (!nsrcs) 1018 if (!nsrcs)
1019 return -EINVAL; 1019 return -EINVAL;
1020 1020
1021 if (!pskb_may_pull(skb, 1021 if (!pskb_may_pull(skb,
1022 len + sizeof(*grec) + 1022 len + sizeof(*grec) +
1023 sizeof(struct in6_addr) * (*nsrcs))) 1023 sizeof(struct in6_addr) * ntohs(*nsrcs)))
1024 return -EINVAL; 1024 return -EINVAL;
1025 1025
1026 grec = (struct mld2_grec *)(skb->data + len); 1026 grec = (struct mld2_grec *)(skb->data + len);
1027 len += sizeof(*grec) + sizeof(struct in6_addr) * (*nsrcs); 1027 len += sizeof(*grec) +
1028 sizeof(struct in6_addr) * ntohs(*nsrcs);
1028 1029
1029 /* We treat these as MLDv1 reports for now. */ 1030 /* We treat these as MLDv1 reports for now. */
1030 switch (grec->grec_type) { 1031 switch (grec->grec_type) {
@@ -1340,7 +1341,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
1340{ 1341{
1341 struct br_ip br_group; 1342 struct br_ip br_group;
1342 1343
1343 if (ipv6_is_local_multicast(group)) 1344 if (!ipv6_is_transient_multicast(group))
1344 return; 1345 return;
1345 1346
1346 ipv6_addr_copy(&br_group.u.ip6, group); 1347 ipv6_addr_copy(&br_group.u.ip6, group);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 35b36b86d762..05f357828a2f 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -336,7 +336,6 @@ static void reset_connection(struct ceph_connection *con)
336 ceph_msg_put(con->out_msg); 336 ceph_msg_put(con->out_msg);
337 con->out_msg = NULL; 337 con->out_msg = NULL;
338 } 338 }
339 con->out_keepalive_pending = false;
340 con->in_seq = 0; 339 con->in_seq = 0;
341 con->in_seq_acked = 0; 340 con->in_seq_acked = 0;
342} 341}
@@ -1248,8 +1247,6 @@ static int process_connect(struct ceph_connection *con)
1248 con->auth_retry); 1247 con->auth_retry);
1249 if (con->auth_retry == 2) { 1248 if (con->auth_retry == 2) {
1250 con->error_msg = "connect authorization failure"; 1249 con->error_msg = "connect authorization failure";
1251 reset_connection(con);
1252 set_bit(CLOSED, &con->state);
1253 return -1; 1250 return -1;
1254 } 1251 }
1255 con->auth_retry = 1; 1252 con->auth_retry = 1;
@@ -1715,14 +1712,6 @@ more:
1715 1712
1716 /* open the socket first? */ 1713 /* open the socket first? */
1717 if (con->sock == NULL) { 1714 if (con->sock == NULL) {
1718 /*
1719 * if we were STANDBY and are reconnecting _this_
1720 * connection, bump connect_seq now. Always bump
1721 * global_seq.
1722 */
1723 if (test_and_clear_bit(STANDBY, &con->state))
1724 con->connect_seq++;
1725
1726 prepare_write_banner(msgr, con); 1715 prepare_write_banner(msgr, con);
1727 prepare_write_connect(msgr, con, 1); 1716 prepare_write_connect(msgr, con, 1);
1728 prepare_read_banner(con); 1717 prepare_read_banner(con);
@@ -1951,7 +1940,24 @@ static void con_work(struct work_struct *work)
1951 work.work); 1940 work.work);
1952 1941
1953 mutex_lock(&con->mutex); 1942 mutex_lock(&con->mutex);
1943 if (test_and_clear_bit(BACKOFF, &con->state)) {
1944 dout("con_work %p backing off\n", con);
1945 if (queue_delayed_work(ceph_msgr_wq, &con->work,
1946 round_jiffies_relative(con->delay))) {
1947 dout("con_work %p backoff %lu\n", con, con->delay);
1948 mutex_unlock(&con->mutex);
1949 return;
1950 } else {
1951 con->ops->put(con);
1952 dout("con_work %p FAILED to back off %lu\n", con,
1953 con->delay);
1954 }
1955 }
1954 1956
1957 if (test_bit(STANDBY, &con->state)) {
1958 dout("con_work %p STANDBY\n", con);
1959 goto done;
1960 }
1955 if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ 1961 if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */
1956 dout("con_work CLOSED\n"); 1962 dout("con_work CLOSED\n");
1957 con_close_socket(con); 1963 con_close_socket(con);
@@ -2008,10 +2014,12 @@ static void ceph_fault(struct ceph_connection *con)
2008 /* Requeue anything that hasn't been acked */ 2014 /* Requeue anything that hasn't been acked */
2009 list_splice_init(&con->out_sent, &con->out_queue); 2015 list_splice_init(&con->out_sent, &con->out_queue);
2010 2016
2011 /* If there are no messages in the queue, place the connection 2017 /* If there are no messages queued or keepalive pending, place
2012 * in a STANDBY state (i.e., don't try to reconnect just yet). */ 2018 * the connection in a STANDBY state */
2013 if (list_empty(&con->out_queue) && !con->out_keepalive_pending) { 2019 if (list_empty(&con->out_queue) &&
2014 dout("fault setting STANDBY\n"); 2020 !test_bit(KEEPALIVE_PENDING, &con->state)) {
2021 dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con);
2022 clear_bit(WRITE_PENDING, &con->state);
2015 set_bit(STANDBY, &con->state); 2023 set_bit(STANDBY, &con->state);
2016 } else { 2024 } else {
2017 /* retry after a delay. */ 2025 /* retry after a delay. */
@@ -2019,11 +2027,24 @@ static void ceph_fault(struct ceph_connection *con)
2019 con->delay = BASE_DELAY_INTERVAL; 2027 con->delay = BASE_DELAY_INTERVAL;
2020 else if (con->delay < MAX_DELAY_INTERVAL) 2028 else if (con->delay < MAX_DELAY_INTERVAL)
2021 con->delay *= 2; 2029 con->delay *= 2;
2022 dout("fault queueing %p delay %lu\n", con, con->delay);
2023 con->ops->get(con); 2030 con->ops->get(con);
2024 if (queue_delayed_work(ceph_msgr_wq, &con->work, 2031 if (queue_delayed_work(ceph_msgr_wq, &con->work,
2025 round_jiffies_relative(con->delay)) == 0) 2032 round_jiffies_relative(con->delay))) {
2033 dout("fault queued %p delay %lu\n", con, con->delay);
2034 } else {
2026 con->ops->put(con); 2035 con->ops->put(con);
2036 dout("fault failed to queue %p delay %lu, backoff\n",
2037 con, con->delay);
2038 /*
2039 * In many cases we see a socket state change
2040 * while con_work is running and end up
2041 * queuing (non-delayed) work, such that we
2042 * can't backoff with a delay. Set a flag so
2043 * that when con_work restarts we schedule the
2044 * delay then.
2045 */
2046 set_bit(BACKOFF, &con->state);
2047 }
2027 } 2048 }
2028 2049
2029out_unlock: 2050out_unlock:
@@ -2094,6 +2115,19 @@ void ceph_messenger_destroy(struct ceph_messenger *msgr)
2094} 2115}
2095EXPORT_SYMBOL(ceph_messenger_destroy); 2116EXPORT_SYMBOL(ceph_messenger_destroy);
2096 2117
2118static void clear_standby(struct ceph_connection *con)
2119{
2120 /* come back from STANDBY? */
2121 if (test_and_clear_bit(STANDBY, &con->state)) {
2122 mutex_lock(&con->mutex);
2123 dout("clear_standby %p and ++connect_seq\n", con);
2124 con->connect_seq++;
2125 WARN_ON(test_bit(WRITE_PENDING, &con->state));
2126 WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state));
2127 mutex_unlock(&con->mutex);
2128 }
2129}
2130
2097/* 2131/*
2098 * Queue up an outgoing message on the given connection. 2132 * Queue up an outgoing message on the given connection.
2099 */ 2133 */
@@ -2126,6 +2160,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
2126 2160
2127 /* if there wasn't anything waiting to send before, queue 2161 /* if there wasn't anything waiting to send before, queue
2128 * new work */ 2162 * new work */
2163 clear_standby(con);
2129 if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) 2164 if (test_and_set_bit(WRITE_PENDING, &con->state) == 0)
2130 queue_con(con); 2165 queue_con(con);
2131} 2166}
@@ -2191,6 +2226,8 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
2191 */ 2226 */
2192void ceph_con_keepalive(struct ceph_connection *con) 2227void ceph_con_keepalive(struct ceph_connection *con)
2193{ 2228{
2229 dout("con_keepalive %p\n", con);
2230 clear_standby(con);
2194 if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 && 2231 if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 &&
2195 test_and_set_bit(WRITE_PENDING, &con->state) == 0) 2232 test_and_set_bit(WRITE_PENDING, &con->state) == 0)
2196 queue_con(con); 2233 queue_con(con);
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 1a040e64c69f..cd9c21df87d1 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -16,22 +16,30 @@ struct page **ceph_get_direct_page_vector(const char __user *data,
16 int num_pages, bool write_page) 16 int num_pages, bool write_page)
17{ 17{
18 struct page **pages; 18 struct page **pages;
19 int rc; 19 int got = 0;
20 int rc = 0;
20 21
21 pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); 22 pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS);
22 if (!pages) 23 if (!pages)
23 return ERR_PTR(-ENOMEM); 24 return ERR_PTR(-ENOMEM);
24 25
25 down_read(&current->mm->mmap_sem); 26 down_read(&current->mm->mmap_sem);
26 rc = get_user_pages(current, current->mm, (unsigned long)data, 27 while (got < num_pages) {
27 num_pages, write_page, 0, pages, NULL); 28 rc = get_user_pages(current, current->mm,
29 (unsigned long)data + ((unsigned long)got * PAGE_SIZE),
30 num_pages - got, write_page, 0, pages + got, NULL);
31 if (rc < 0)
32 break;
33 BUG_ON(rc == 0);
34 got += rc;
35 }
28 up_read(&current->mm->mmap_sem); 36 up_read(&current->mm->mmap_sem);
29 if (rc < num_pages) 37 if (rc < 0)
30 goto fail; 38 goto fail;
31 return pages; 39 return pages;
32 40
33fail: 41fail:
34 ceph_put_page_vector(pages, rc > 0 ? rc : 0, false); 42 ceph_put_page_vector(pages, got, false);
35 return ERR_PTR(rc); 43 return ERR_PTR(rc);
36} 44}
37EXPORT_SYMBOL(ceph_get_direct_page_vector); 45EXPORT_SYMBOL(ceph_get_direct_page_vector);
diff --git a/net/core/dev.c b/net/core/dev.c
index 8ae6631abcc2..6561021d22d1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1114,13 +1114,21 @@ EXPORT_SYMBOL(netdev_bonding_change);
1114void dev_load(struct net *net, const char *name) 1114void dev_load(struct net *net, const char *name)
1115{ 1115{
1116 struct net_device *dev; 1116 struct net_device *dev;
1117 int no_module;
1117 1118
1118 rcu_read_lock(); 1119 rcu_read_lock();
1119 dev = dev_get_by_name_rcu(net, name); 1120 dev = dev_get_by_name_rcu(net, name);
1120 rcu_read_unlock(); 1121 rcu_read_unlock();
1121 1122
1122 if (!dev && capable(CAP_NET_ADMIN)) 1123 no_module = !dev;
1123 request_module("%s", name); 1124 if (no_module && capable(CAP_NET_ADMIN))
1125 no_module = request_module("netdev-%s", name);
1126 if (no_module && capable(CAP_SYS_MODULE)) {
1127 if (!request_module("%s", name))
1128 pr_err("Loading kernel module for a network device "
1129"with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s "
1130"instead\n", name);
1131 }
1124} 1132}
1125EXPORT_SYMBOL(dev_load); 1133EXPORT_SYMBOL(dev_load);
1126 1134
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 508f9c18992f..133fd22ea287 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -144,7 +144,7 @@ void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
144 144
145 list_for_each_entry(ha, &from_list->list, list) { 145 list_for_each_entry(ha, &from_list->list, list) {
146 type = addr_type ? addr_type : ha->type; 146 type = addr_type ? addr_type : ha->type;
147 __hw_addr_del(to_list, ha->addr, addr_len, addr_type); 147 __hw_addr_del(to_list, ha->addr, addr_len, type);
148 } 148 }
149} 149}
150EXPORT_SYMBOL(__hw_addr_del_multiple); 150EXPORT_SYMBOL(__hw_addr_del_multiple);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index a9e7fc4c461f..b5bada92f637 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3321,7 +3321,7 @@ static void show_results(struct pktgen_dev *pkt_dev, int nr_frags)
3321 pkt_dev->started_at); 3321 pkt_dev->started_at);
3322 ktime_t idle = ns_to_ktime(pkt_dev->idle_acc); 3322 ktime_t idle = ns_to_ktime(pkt_dev->idle_acc);
3323 3323
3324 p += sprintf(p, "OK: %llu(c%llu+d%llu) nsec, %llu (%dbyte,%dfrags)\n", 3324 p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte,%dfrags)\n",
3325 (unsigned long long)ktime_to_us(elapsed), 3325 (unsigned long long)ktime_to_us(elapsed),
3326 (unsigned long long)ktime_to_us(ktime_sub(elapsed, idle)), 3326 (unsigned long long)ktime_to_us(ktime_sub(elapsed, idle)),
3327 (unsigned long long)ktime_to_us(idle), 3327 (unsigned long long)ktime_to_us(idle),
diff --git a/net/core/scm.c b/net/core/scm.c
index bbe454450801..4c1ef026d695 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -95,7 +95,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
95 int fd = fdp[i]; 95 int fd = fdp[i];
96 struct file *file; 96 struct file *file;
97 97
98 if (fd < 0 || !(file = fget(fd))) 98 if (fd < 0 || !(file = fget_raw(fd)))
99 return -EBADF; 99 return -EBADF;
100 *fpp++ = file; 100 *fpp++ = file;
101 fpl->count++; 101 fpl->count++;
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index d5074a567289..c44348adba3b 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1193,7 +1193,7 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb,
1193 goto err; 1193 goto err;
1194 } 1194 }
1195 1195
1196 if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setets) { 1196 if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setpfc) {
1197 struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]); 1197 struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]);
1198 err = ops->ieee_setpfc(netdev, pfc); 1198 err = ops->ieee_setpfc(netdev, pfc);
1199 if (err) 1199 if (err)
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 8cde009e8b85..4222e7a654b0 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -614,6 +614,9 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
614 /* Caller (dccp_v4_do_rcv) will send Reset */ 614 /* Caller (dccp_v4_do_rcv) will send Reset */
615 dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; 615 dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
616 return 1; 616 return 1;
617 } else if (sk->sk_state == DCCP_CLOSED) {
618 dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
619 return 1;
617 } 620 }
618 621
619 if (sk->sk_state != DCCP_REQUESTING && sk->sk_state != DCCP_RESPOND) { 622 if (sk->sk_state != DCCP_REQUESTING && sk->sk_state != DCCP_RESPOND) {
@@ -668,10 +671,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
668 } 671 }
669 672
670 switch (sk->sk_state) { 673 switch (sk->sk_state) {
671 case DCCP_CLOSED:
672 dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
673 return 1;
674
675 case DCCP_REQUESTING: 674 case DCCP_REQUESTING:
676 queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len); 675 queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len);
677 if (queued >= 0) 676 if (queued >= 0)
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 739435a6af39..cfa7a5e1c5c9 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -67,8 +67,9 @@ dns_resolver_instantiate(struct key *key, const void *_data, size_t datalen)
67 size_t result_len = 0; 67 size_t result_len = 0;
68 const char *data = _data, *end, *opt; 68 const char *data = _data, *end, *opt;
69 69
70 kenter("%%%d,%s,'%s',%zu", 70 kenter("%%%d,%s,'%*.*s',%zu",
71 key->serial, key->description, data, datalen); 71 key->serial, key->description,
72 (int)datalen, (int)datalen, data, datalen);
72 73
73 if (datalen <= 1 || !data || data[datalen - 1] != '\0') 74 if (datalen <= 1 || !data || data[datalen - 1] != '\0')
74 return -EINVAL; 75 return -EINVAL;
@@ -217,6 +218,19 @@ static void dns_resolver_describe(const struct key *key, struct seq_file *m)
217 seq_printf(m, ": %u", key->datalen); 218 seq_printf(m, ": %u", key->datalen);
218} 219}
219 220
221/*
222 * read the DNS data
223 * - the key's semaphore is read-locked
224 */
225static long dns_resolver_read(const struct key *key,
226 char __user *buffer, size_t buflen)
227{
228 if (key->type_data.x[0])
229 return key->type_data.x[0];
230
231 return user_read(key, buffer, buflen);
232}
233
220struct key_type key_type_dns_resolver = { 234struct key_type key_type_dns_resolver = {
221 .name = "dns_resolver", 235 .name = "dns_resolver",
222 .instantiate = dns_resolver_instantiate, 236 .instantiate = dns_resolver_instantiate,
@@ -224,7 +238,7 @@ struct key_type key_type_dns_resolver = {
224 .revoke = user_revoke, 238 .revoke = user_revoke,
225 .destroy = user_destroy, 239 .destroy = user_destroy,
226 .describe = dns_resolver_describe, 240 .describe = dns_resolver_describe,
227 .read = user_read, 241 .read = dns_resolver_read,
228}; 242};
229 243
230static int __init init_dns_resolver(void) 244static int __init init_dns_resolver(void)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index df4616fce929..036652c8166d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -670,7 +670,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
670 ifap = &ifa->ifa_next) { 670 ifap = &ifa->ifa_next) {
671 if (!strcmp(ifr.ifr_name, ifa->ifa_label) && 671 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
672 sin_orig.sin_addr.s_addr == 672 sin_orig.sin_addr.s_addr ==
673 ifa->ifa_address) { 673 ifa->ifa_local) {
674 break; /* found */ 674 break; /* found */
675 } 675 }
676 } 676 }
@@ -1040,8 +1040,8 @@ static void inetdev_send_gratuitous_arp(struct net_device *dev,
1040 return; 1040 return;
1041 1041
1042 arp_send(ARPOP_REQUEST, ETH_P_ARP, 1042 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1043 ifa->ifa_address, dev, 1043 ifa->ifa_local, dev,
1044 ifa->ifa_address, NULL, 1044 ifa->ifa_local, NULL,
1045 dev->dev_addr, NULL); 1045 dev->dev_addr, NULL);
1046} 1046}
1047 1047
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index c5af909cf701..3c8dfa16614d 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -505,7 +505,9 @@ restart:
505 } 505 }
506 506
507 rcu_read_unlock(); 507 rcu_read_unlock();
508 local_bh_disable();
508 inet_twsk_deschedule(tw, twdr); 509 inet_twsk_deschedule(tw, twdr);
510 local_bh_enable();
509 inet_twsk_put(tw); 511 inet_twsk_put(tw);
510 goto restart_rcu; 512 goto restart_rcu;
511 } 513 }
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 6613edfac28c..d1d0e2c256fc 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1765,4 +1765,4 @@ module_exit(ipgre_fini);
1765MODULE_LICENSE("GPL"); 1765MODULE_LICENSE("GPL");
1766MODULE_ALIAS_RTNL_LINK("gre"); 1766MODULE_ALIAS_RTNL_LINK("gre");
1767MODULE_ALIAS_RTNL_LINK("gretap"); 1767MODULE_ALIAS_RTNL_LINK("gretap");
1768MODULE_ALIAS("gre0"); 1768MODULE_ALIAS_NETDEV("gre0");
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 988f52fba54a..a5f58e7cbb26 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -913,4 +913,4 @@ static void __exit ipip_fini(void)
913module_init(ipip_init); 913module_init(ipip_init);
914module_exit(ipip_fini); 914module_exit(ipip_fini);
915MODULE_LICENSE("GPL"); 915MODULE_LICENSE("GPL");
916MODULE_ALIAS("tunl0"); 916MODULE_ALIAS_NETDEV("tunl0");
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eb7f82ebf4a3..65f6c0406245 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1222,7 +1222,7 @@ static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb,
1222 } 1222 }
1223 1223
1224 /* D-SACK for already forgotten data... Do dumb counting. */ 1224 /* D-SACK for already forgotten data... Do dumb counting. */
1225 if (dup_sack && 1225 if (dup_sack && tp->undo_marker && tp->undo_retrans &&
1226 !after(end_seq_0, prior_snd_una) && 1226 !after(end_seq_0, prior_snd_una) &&
1227 after(end_seq_0, tp->undo_marker)) 1227 after(end_seq_0, tp->undo_marker))
1228 tp->undo_retrans--; 1228 tp->undo_retrans--;
@@ -1299,7 +1299,8 @@ static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
1299 1299
1300 /* Account D-SACK for retransmitted packet. */ 1300 /* Account D-SACK for retransmitted packet. */
1301 if (dup_sack && (sacked & TCPCB_RETRANS)) { 1301 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1302 if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) 1302 if (tp->undo_marker && tp->undo_retrans &&
1303 after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
1303 tp->undo_retrans--; 1304 tp->undo_retrans--;
1304 if (sacked & TCPCB_SACKED_ACKED) 1305 if (sacked & TCPCB_SACKED_ACKED)
1305 state->reord = min(fack_count, state->reord); 1306 state->reord = min(fack_count, state->reord);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 406f320336e6..dfa5beb0c1c8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2162,7 +2162,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2162 if (!tp->retrans_stamp) 2162 if (!tp->retrans_stamp)
2163 tp->retrans_stamp = TCP_SKB_CB(skb)->when; 2163 tp->retrans_stamp = TCP_SKB_CB(skb)->when;
2164 2164
2165 tp->undo_retrans++; 2165 tp->undo_retrans += tcp_skb_pcount(skb);
2166 2166
2167 /* snd_nxt is stored to detect loss of retransmitted segment, 2167 /* snd_nxt is stored to detect loss of retransmitted segment,
2168 * see tcp_input.c tcp_sacktag_write_queue(). 2168 * see tcp_input.c tcp_sacktag_write_queue().
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 4f4483e697bd..e528a42a52be 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -57,6 +57,7 @@
57MODULE_AUTHOR("Ville Nuorvala"); 57MODULE_AUTHOR("Ville Nuorvala");
58MODULE_DESCRIPTION("IPv6 tunneling device"); 58MODULE_DESCRIPTION("IPv6 tunneling device");
59MODULE_LICENSE("GPL"); 59MODULE_LICENSE("GPL");
60MODULE_ALIAS_NETDEV("ip6tnl0");
60 61
61#ifdef IP6_TNL_DEBUG 62#ifdef IP6_TNL_DEBUG
62#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__) 63#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__)
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 09c88891a753..de338037a736 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -410,7 +410,7 @@ fallback:
410 if (p != NULL) { 410 if (p != NULL) {
411 sb_add(m, "%02x", *p++); 411 sb_add(m, "%02x", *p++);
412 for (i = 1; i < len; i++) 412 for (i = 1; i < len; i++)
413 sb_add(m, ":%02x", p[i]); 413 sb_add(m, ":%02x", *p++);
414 } 414 }
415 sb_add(m, " "); 415 sb_add(m, " ");
416 416
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a998db6e7895..e7db7014e89f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -739,8 +739,10 @@ restart:
739 739
740 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 740 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
741 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); 741 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
742 else 742 else if (!(rt->dst.flags & DST_HOST))
743 nrt = rt6_alloc_clone(rt, &fl->fl6_dst); 743 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
744 else
745 goto out2;
744 746
745 dst_release(&rt->dst); 747 dst_release(&rt->dst);
746 rt = nrt ? : net->ipv6.ip6_null_entry; 748 rt = nrt ? : net->ipv6.ip6_null_entry;
@@ -2557,14 +2559,16 @@ static
2557int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, 2559int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2558 void __user *buffer, size_t *lenp, loff_t *ppos) 2560 void __user *buffer, size_t *lenp, loff_t *ppos)
2559{ 2561{
2560 struct net *net = current->nsproxy->net_ns; 2562 struct net *net;
2561 int delay = net->ipv6.sysctl.flush_delay; 2563 int delay;
2562 if (write) { 2564 if (!write)
2563 proc_dointvec(ctl, write, buffer, lenp, ppos);
2564 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2565 return 0;
2566 } else
2567 return -EINVAL; 2565 return -EINVAL;
2566
2567 net = (struct net *)ctl->extra1;
2568 delay = net->ipv6.sysctl.flush_delay;
2569 proc_dointvec(ctl, write, buffer, lenp, ppos);
2570 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2571 return 0;
2568} 2572}
2569 2573
2570ctl_table ipv6_route_table_template[] = { 2574ctl_table ipv6_route_table_template[] = {
@@ -2651,6 +2655,7 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2651 2655
2652 if (table) { 2656 if (table) {
2653 table[0].data = &net->ipv6.sysctl.flush_delay; 2657 table[0].data = &net->ipv6.sysctl.flush_delay;
2658 table[0].extra1 = net;
2654 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; 2659 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2655 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; 2660 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2656 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2661 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 8ce38f10a547..d2c16e10f650 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1290,4 +1290,4 @@ static int __init sit_init(void)
1290module_init(sit_init); 1290module_init(sit_init);
1291module_exit(sit_cleanup); 1291module_exit(sit_cleanup);
1292MODULE_LICENSE("GPL"); 1292MODULE_LICENSE("GPL");
1293MODULE_ALIAS("sit0"); 1293MODULE_ALIAS_NETDEV("sit0");
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 8acba456744e..7a10a8d1b2d0 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1229,6 +1229,7 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local)
1229 } 1229 }
1230 mutex_unlock(&local->iflist_mtx); 1230 mutex_unlock(&local->iflist_mtx);
1231 unregister_netdevice_many(&unreg_list); 1231 unregister_netdevice_many(&unreg_list);
1232 list_del(&unreg_list);
1232} 1233}
1233 1234
1234static u32 ieee80211_idle_off(struct ieee80211_local *local, 1235static u32 ieee80211_idle_off(struct ieee80211_local *local,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 45fbb9e33746..c9ceb4d57ab0 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1033,6 +1033,12 @@ void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
1033 if (is_multicast_ether_addr(hdr->addr1)) 1033 if (is_multicast_ether_addr(hdr->addr1))
1034 return; 1034 return;
1035 1035
1036 /*
1037 * In case we receive frames after disassociation.
1038 */
1039 if (!sdata->u.mgd.associated)
1040 return;
1041
1036 ieee80211_sta_reset_conn_monitor(sdata); 1042 ieee80211_sta_reset_conn_monitor(sdata);
1037} 1043}
1038 1044
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 22f7ad5101ab..ba98e1308f3c 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -808,9 +808,9 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
808 dest->u_threshold = udest->u_threshold; 808 dest->u_threshold = udest->u_threshold;
809 dest->l_threshold = udest->l_threshold; 809 dest->l_threshold = udest->l_threshold;
810 810
811 spin_lock(&dest->dst_lock); 811 spin_lock_bh(&dest->dst_lock);
812 ip_vs_dst_reset(dest); 812 ip_vs_dst_reset(dest);
813 spin_unlock(&dest->dst_lock); 813 spin_unlock_bh(&dest->dst_lock);
814 814
815 if (add) 815 if (add)
816 ip_vs_new_estimator(&dest->stats); 816 ip_vs_new_estimator(&dest->stats);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index b07393eab88e..91816998ed86 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -85,6 +85,8 @@ EXPORT_SYMBOL(nf_log_unregister);
85 85
86int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger) 86int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger)
87{ 87{
88 if (pf >= ARRAY_SIZE(nf_loggers))
89 return -EINVAL;
88 mutex_lock(&nf_log_mutex); 90 mutex_lock(&nf_log_mutex);
89 if (__find_logger(pf, logger->name) == NULL) { 91 if (__find_logger(pf, logger->name) == NULL) {
90 mutex_unlock(&nf_log_mutex); 92 mutex_unlock(&nf_log_mutex);
@@ -98,6 +100,8 @@ EXPORT_SYMBOL(nf_log_bind_pf);
98 100
99void nf_log_unbind_pf(u_int8_t pf) 101void nf_log_unbind_pf(u_int8_t pf)
100{ 102{
103 if (pf >= ARRAY_SIZE(nf_loggers))
104 return;
101 mutex_lock(&nf_log_mutex); 105 mutex_lock(&nf_log_mutex);
102 rcu_assign_pointer(nf_loggers[pf], NULL); 106 rcu_assign_pointer(nf_loggers[pf], NULL);
103 mutex_unlock(&nf_log_mutex); 107 mutex_unlock(&nf_log_mutex);
diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c
index 4d87befb04c0..474d621cbc2e 100644
--- a/net/netfilter/nf_tproxy_core.c
+++ b/net/netfilter/nf_tproxy_core.c
@@ -28,26 +28,23 @@ nf_tproxy_destructor(struct sk_buff *skb)
28 skb->destructor = NULL; 28 skb->destructor = NULL;
29 29
30 if (sk) 30 if (sk)
31 nf_tproxy_put_sock(sk); 31 sock_put(sk);
32} 32}
33 33
34/* consumes sk */ 34/* consumes sk */
35int 35void
36nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) 36nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
37{ 37{
38 bool transparent = (sk->sk_state == TCP_TIME_WAIT) ? 38 /* assigning tw sockets complicates things; most
39 inet_twsk(sk)->tw_transparent : 39 * skb->sk->X checks would have to test sk->sk_state first */
40 inet_sk(sk)->transparent; 40 if (sk->sk_state == TCP_TIME_WAIT) {
41 41 inet_twsk_put(inet_twsk(sk));
42 if (transparent) { 42 return;
43 skb_orphan(skb); 43 }
44 skb->sk = sk; 44
45 skb->destructor = nf_tproxy_destructor; 45 skb_orphan(skb);
46 return 1; 46 skb->sk = sk;
47 } else 47 skb->destructor = nf_tproxy_destructor;
48 nf_tproxy_put_sock(sk);
49
50 return 0;
51} 48}
52EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock); 49EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock);
53 50
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 640678f47a2a..dcfd57eb9d02 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -33,6 +33,20 @@
33#include <net/netfilter/nf_tproxy_core.h> 33#include <net/netfilter/nf_tproxy_core.h>
34#include <linux/netfilter/xt_TPROXY.h> 34#include <linux/netfilter/xt_TPROXY.h>
35 35
36static bool tproxy_sk_is_transparent(struct sock *sk)
37{
38 if (sk->sk_state != TCP_TIME_WAIT) {
39 if (inet_sk(sk)->transparent)
40 return true;
41 sock_put(sk);
42 } else {
43 if (inet_twsk(sk)->tw_transparent)
44 return true;
45 inet_twsk_put(inet_twsk(sk));
46 }
47 return false;
48}
49
36static inline __be32 50static inline __be32
37tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr) 51tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
38{ 52{
@@ -141,7 +155,7 @@ tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
141 skb->dev, NFT_LOOKUP_LISTENER); 155 skb->dev, NFT_LOOKUP_LISTENER);
142 156
143 /* NOTE: assign_sock consumes our sk reference */ 157 /* NOTE: assign_sock consumes our sk reference */
144 if (sk && nf_tproxy_assign_sock(skb, sk)) { 158 if (sk && tproxy_sk_is_transparent(sk)) {
145 /* This should be in a separate target, but we don't do multiple 159 /* This should be in a separate target, but we don't do multiple
146 targets on the same rule yet */ 160 targets on the same rule yet */
147 skb->mark = (skb->mark & ~mark_mask) ^ mark_value; 161 skb->mark = (skb->mark & ~mark_mask) ^ mark_value;
@@ -149,6 +163,8 @@ tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
149 pr_debug("redirecting: proto %hhu %pI4:%hu -> %pI4:%hu, mark: %x\n", 163 pr_debug("redirecting: proto %hhu %pI4:%hu -> %pI4:%hu, mark: %x\n",
150 iph->protocol, &iph->daddr, ntohs(hp->dest), 164 iph->protocol, &iph->daddr, ntohs(hp->dest),
151 &laddr, ntohs(lport), skb->mark); 165 &laddr, ntohs(lport), skb->mark);
166
167 nf_tproxy_assign_sock(skb, sk);
152 return NF_ACCEPT; 168 return NF_ACCEPT;
153 } 169 }
154 170
@@ -306,7 +322,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
306 par->in, NFT_LOOKUP_LISTENER); 322 par->in, NFT_LOOKUP_LISTENER);
307 323
308 /* NOTE: assign_sock consumes our sk reference */ 324 /* NOTE: assign_sock consumes our sk reference */
309 if (sk && nf_tproxy_assign_sock(skb, sk)) { 325 if (sk && tproxy_sk_is_transparent(sk)) {
310 /* This should be in a separate target, but we don't do multiple 326 /* This should be in a separate target, but we don't do multiple
311 targets on the same rule yet */ 327 targets on the same rule yet */
312 skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value; 328 skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value;
@@ -314,6 +330,8 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
314 pr_debug("redirecting: proto %hhu %pI6:%hu -> %pI6:%hu, mark: %x\n", 330 pr_debug("redirecting: proto %hhu %pI6:%hu -> %pI6:%hu, mark: %x\n",
315 tproto, &iph->saddr, ntohs(hp->source), 331 tproto, &iph->saddr, ntohs(hp->source),
316 laddr, ntohs(lport), skb->mark); 332 laddr, ntohs(lport), skb->mark);
333
334 nf_tproxy_assign_sock(skb, sk);
317 return NF_ACCEPT; 335 return NF_ACCEPT;
318 } 336 }
319 337
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 00d6ae838303..9cc46356b577 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -35,6 +35,15 @@
35#include <net/netfilter/nf_conntrack.h> 35#include <net/netfilter/nf_conntrack.h>
36#endif 36#endif
37 37
38static void
39xt_socket_put_sk(struct sock *sk)
40{
41 if (sk->sk_state == TCP_TIME_WAIT)
42 inet_twsk_put(inet_twsk(sk));
43 else
44 sock_put(sk);
45}
46
38static int 47static int
39extract_icmp4_fields(const struct sk_buff *skb, 48extract_icmp4_fields(const struct sk_buff *skb,
40 u8 *protocol, 49 u8 *protocol,
@@ -164,7 +173,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
164 (sk->sk_state == TCP_TIME_WAIT && 173 (sk->sk_state == TCP_TIME_WAIT &&
165 inet_twsk(sk)->tw_transparent)); 174 inet_twsk(sk)->tw_transparent));
166 175
167 nf_tproxy_put_sock(sk); 176 xt_socket_put_sk(sk);
168 177
169 if (wildcard || !transparent) 178 if (wildcard || !transparent)
170 sk = NULL; 179 sk = NULL;
@@ -298,7 +307,7 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
298 (sk->sk_state == TCP_TIME_WAIT && 307 (sk->sk_state == TCP_TIME_WAIT &&
299 inet_twsk(sk)->tw_transparent)); 308 inet_twsk(sk)->tw_transparent));
300 309
301 nf_tproxy_put_sock(sk); 310 xt_socket_put_sk(sk);
302 311
303 if (wildcard || !transparent) 312 if (wildcard || !transparent)
304 sk = NULL; 313 sk = NULL;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 478181d53c55..1f924595bdef 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1407,7 +1407,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
1407 int noblock = flags&MSG_DONTWAIT; 1407 int noblock = flags&MSG_DONTWAIT;
1408 size_t copied; 1408 size_t copied;
1409 struct sk_buff *skb, *data_skb; 1409 struct sk_buff *skb, *data_skb;
1410 int err; 1410 int err, ret;
1411 1411
1412 if (flags&MSG_OOB) 1412 if (flags&MSG_OOB)
1413 return -EOPNOTSUPP; 1413 return -EOPNOTSUPP;
@@ -1470,8 +1470,13 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
1470 1470
1471 skb_free_datagram(sk, skb); 1471 skb_free_datagram(sk, skb);
1472 1472
1473 if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) 1473 if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
1474 netlink_dump(sk); 1474 ret = netlink_dump(sk);
1475 if (ret) {
1476 sk->sk_err = ret;
1477 sk->sk_error_report(sk);
1478 }
1479 }
1475 1480
1476 scm_recv(sock, msg, siocb->scm, flags); 1481 scm_recv(sock, msg, siocb->scm, flags);
1477out: 1482out:
@@ -1736,6 +1741,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1736 struct netlink_callback *cb; 1741 struct netlink_callback *cb;
1737 struct sock *sk; 1742 struct sock *sk;
1738 struct netlink_sock *nlk; 1743 struct netlink_sock *nlk;
1744 int ret;
1739 1745
1740 cb = kzalloc(sizeof(*cb), GFP_KERNEL); 1746 cb = kzalloc(sizeof(*cb), GFP_KERNEL);
1741 if (cb == NULL) 1747 if (cb == NULL)
@@ -1764,9 +1770,13 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1764 nlk->cb = cb; 1770 nlk->cb = cb;
1765 mutex_unlock(nlk->cb_mutex); 1771 mutex_unlock(nlk->cb_mutex);
1766 1772
1767 netlink_dump(sk); 1773 ret = netlink_dump(sk);
1774
1768 sock_put(sk); 1775 sock_put(sk);
1769 1776
1777 if (ret)
1778 return ret;
1779
1770 /* We successfully started a dump, by returning -EINTR we 1780 /* We successfully started a dump, by returning -EINTR we
1771 * signal not to send ACK even if it was requested. 1781 * signal not to send ACK even if it was requested.
1772 */ 1782 */
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 71f373c421bc..c47a511f203d 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -551,7 +551,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
551 if (conn->c_loopback 551 if (conn->c_loopback
552 && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { 552 && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
553 rds_cong_map_updated(conn->c_fcong, ~(u64) 0); 553 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
554 return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; 554 scat = &rm->data.op_sg[sg];
555 ret = sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
556 ret = min_t(int, ret, scat->length - conn->c_xmit_data_off);
557 return ret;
555 } 558 }
556 559
557 /* FIXME we may overallocate here */ 560 /* FIXME we may overallocate here */
diff --git a/net/rds/loop.c b/net/rds/loop.c
index aeec1d483b17..bca6761a3ca2 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -61,10 +61,15 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
61 unsigned int hdr_off, unsigned int sg, 61 unsigned int hdr_off, unsigned int sg,
62 unsigned int off) 62 unsigned int off)
63{ 63{
64 struct scatterlist *sgp = &rm->data.op_sg[sg];
65 int ret = sizeof(struct rds_header) +
66 be32_to_cpu(rm->m_inc.i_hdr.h_len);
67
64 /* Do not send cong updates to loopback */ 68 /* Do not send cong updates to loopback */
65 if (rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { 69 if (rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
66 rds_cong_map_updated(conn->c_fcong, ~(u64) 0); 70 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
67 return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; 71 ret = min_t(int, ret, sgp->length - conn->c_xmit_data_off);
72 goto out;
68 } 73 }
69 74
70 BUG_ON(hdr_off || sg || off); 75 BUG_ON(hdr_off || sg || off);
@@ -80,8 +85,8 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
80 NULL); 85 NULL);
81 86
82 rds_inc_put(&rm->m_inc); 87 rds_inc_put(&rm->m_inc);
83 88out:
84 return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len); 89 return ret;
85} 90}
86 91
87/* 92/*
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index 89315009bab1..1a2b0633fece 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -423,6 +423,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
423 goto protocol_error; 423 goto protocol_error;
424 } 424 }
425 425
426 case RXRPC_PACKET_TYPE_ACKALL:
426 case RXRPC_PACKET_TYPE_ACK: 427 case RXRPC_PACKET_TYPE_ACK:
427 /* ACK processing is done in process context */ 428 /* ACK processing is done in process context */
428 read_lock_bh(&call->state_lock); 429 read_lock_bh(&call->state_lock);
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index 5ee16f0353fe..d763793d39de 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -89,11 +89,11 @@ static int rxrpc_instantiate_xdr_rxkad(struct key *key, const __be32 *xdr,
89 return ret; 89 return ret;
90 90
91 plen -= sizeof(*token); 91 plen -= sizeof(*token);
92 token = kmalloc(sizeof(*token), GFP_KERNEL); 92 token = kzalloc(sizeof(*token), GFP_KERNEL);
93 if (!token) 93 if (!token)
94 return -ENOMEM; 94 return -ENOMEM;
95 95
96 token->kad = kmalloc(plen, GFP_KERNEL); 96 token->kad = kzalloc(plen, GFP_KERNEL);
97 if (!token->kad) { 97 if (!token->kad) {
98 kfree(token); 98 kfree(token);
99 return -ENOMEM; 99 return -ENOMEM;
@@ -731,10 +731,10 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
731 goto error; 731 goto error;
732 732
733 ret = -ENOMEM; 733 ret = -ENOMEM;
734 token = kmalloc(sizeof(*token), GFP_KERNEL); 734 token = kzalloc(sizeof(*token), GFP_KERNEL);
735 if (!token) 735 if (!token)
736 goto error; 736 goto error;
737 token->kad = kmalloc(plen, GFP_KERNEL); 737 token->kad = kzalloc(plen, GFP_KERNEL);
738 if (!token->kad) 738 if (!token->kad)
739 goto error_free; 739 goto error_free;
740 740
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 34dc598440a2..1bc698039ae2 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -839,6 +839,7 @@ void dev_deactivate(struct net_device *dev)
839 839
840 list_add(&dev->unreg_list, &single); 840 list_add(&dev->unreg_list, &single);
841 dev_deactivate_many(&single); 841 dev_deactivate_many(&single);
842 list_del(&single);
842} 843}
843 844
844static void dev_init_scheduler_queue(struct net_device *dev, 845static void dev_init_scheduler_queue(struct net_device *dev,
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 2cc46f0962ca..b23428f3c0dd 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2029,11 +2029,11 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc,
2029 *errp = sctp_make_op_error_fixed(asoc, chunk); 2029 *errp = sctp_make_op_error_fixed(asoc, chunk);
2030 2030
2031 if (*errp) { 2031 if (*errp) {
2032 sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM, 2032 if (!sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM,
2033 WORD_ROUND(ntohs(param.p->length))); 2033 WORD_ROUND(ntohs(param.p->length))))
2034 sctp_addto_chunk_fixed(*errp, 2034 sctp_addto_chunk_fixed(*errp,
2035 WORD_ROUND(ntohs(param.p->length)), 2035 WORD_ROUND(ntohs(param.p->length)),
2036 param.v); 2036 param.v);
2037 } else { 2037 } else {
2038 /* If there is no memory for generating the ERROR 2038 /* If there is no memory for generating the ERROR
2039 * report as specified, an ABORT will be triggered 2039 * report as specified, an ABORT will be triggered
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 243fc09b164e..59e599498e37 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -252,23 +252,37 @@ static void rpc_set_active(struct rpc_task *task)
252 252
253/* 253/*
254 * Mark an RPC call as having completed by clearing the 'active' bit 254 * Mark an RPC call as having completed by clearing the 'active' bit
255 * and then waking up all tasks that were sleeping.
255 */ 256 */
256static void rpc_mark_complete_task(struct rpc_task *task) 257static int rpc_complete_task(struct rpc_task *task)
257{ 258{
258 smp_mb__before_clear_bit(); 259 void *m = &task->tk_runstate;
260 wait_queue_head_t *wq = bit_waitqueue(m, RPC_TASK_ACTIVE);
261 struct wait_bit_key k = __WAIT_BIT_KEY_INITIALIZER(m, RPC_TASK_ACTIVE);
262 unsigned long flags;
263 int ret;
264
265 spin_lock_irqsave(&wq->lock, flags);
259 clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate); 266 clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
260 smp_mb__after_clear_bit(); 267 ret = atomic_dec_and_test(&task->tk_count);
261 wake_up_bit(&task->tk_runstate, RPC_TASK_ACTIVE); 268 if (waitqueue_active(wq))
269 __wake_up_locked_key(wq, TASK_NORMAL, &k);
270 spin_unlock_irqrestore(&wq->lock, flags);
271 return ret;
262} 272}
263 273
264/* 274/*
265 * Allow callers to wait for completion of an RPC call 275 * Allow callers to wait for completion of an RPC call
276 *
277 * Note the use of out_of_line_wait_on_bit() rather than wait_on_bit()
278 * to enforce taking of the wq->lock and hence avoid races with
279 * rpc_complete_task().
266 */ 280 */
267int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *)) 281int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *))
268{ 282{
269 if (action == NULL) 283 if (action == NULL)
270 action = rpc_wait_bit_killable; 284 action = rpc_wait_bit_killable;
271 return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE, 285 return out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
272 action, TASK_KILLABLE); 286 action, TASK_KILLABLE);
273} 287}
274EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task); 288EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
@@ -857,34 +871,67 @@ static void rpc_async_release(struct work_struct *work)
857 rpc_free_task(container_of(work, struct rpc_task, u.tk_work)); 871 rpc_free_task(container_of(work, struct rpc_task, u.tk_work));
858} 872}
859 873
860void rpc_put_task(struct rpc_task *task) 874static void rpc_release_resources_task(struct rpc_task *task)
861{ 875{
862 if (!atomic_dec_and_test(&task->tk_count))
863 return;
864 /* Release resources */
865 if (task->tk_rqstp) 876 if (task->tk_rqstp)
866 xprt_release(task); 877 xprt_release(task);
867 if (task->tk_msg.rpc_cred) 878 if (task->tk_msg.rpc_cred)
868 put_rpccred(task->tk_msg.rpc_cred); 879 put_rpccred(task->tk_msg.rpc_cred);
869 rpc_task_release_client(task); 880 rpc_task_release_client(task);
870 if (task->tk_workqueue != NULL) { 881}
882
883static void rpc_final_put_task(struct rpc_task *task,
884 struct workqueue_struct *q)
885{
886 if (q != NULL) {
871 INIT_WORK(&task->u.tk_work, rpc_async_release); 887 INIT_WORK(&task->u.tk_work, rpc_async_release);
872 queue_work(task->tk_workqueue, &task->u.tk_work); 888 queue_work(q, &task->u.tk_work);
873 } else 889 } else
874 rpc_free_task(task); 890 rpc_free_task(task);
875} 891}
892
893static void rpc_do_put_task(struct rpc_task *task, struct workqueue_struct *q)
894{
895 if (atomic_dec_and_test(&task->tk_count)) {
896 rpc_release_resources_task(task);
897 rpc_final_put_task(task, q);
898 }
899}
900
901void rpc_put_task(struct rpc_task *task)
902{
903 rpc_do_put_task(task, NULL);
904}
876EXPORT_SYMBOL_GPL(rpc_put_task); 905EXPORT_SYMBOL_GPL(rpc_put_task);
877 906
907void rpc_put_task_async(struct rpc_task *task)
908{
909 rpc_do_put_task(task, task->tk_workqueue);
910}
911EXPORT_SYMBOL_GPL(rpc_put_task_async);
912
878static void rpc_release_task(struct rpc_task *task) 913static void rpc_release_task(struct rpc_task *task)
879{ 914{
880 dprintk("RPC: %5u release task\n", task->tk_pid); 915 dprintk("RPC: %5u release task\n", task->tk_pid);
881 916
882 BUG_ON (RPC_IS_QUEUED(task)); 917 BUG_ON (RPC_IS_QUEUED(task));
883 918
884 /* Wake up anyone who is waiting for task completion */ 919 rpc_release_resources_task(task);
885 rpc_mark_complete_task(task);
886 920
887 rpc_put_task(task); 921 /*
922 * Note: at this point we have been removed from rpc_clnt->cl_tasks,
923 * so it should be safe to use task->tk_count as a test for whether
924 * or not any other processes still hold references to our rpc_task.
925 */
926 if (atomic_read(&task->tk_count) != 1 + !RPC_IS_ASYNC(task)) {
927 /* Wake up anyone who may be waiting for task completion */
928 if (!rpc_complete_task(task))
929 return;
930 } else {
931 if (!atomic_dec_and_test(&task->tk_count))
932 return;
933 }
934 rpc_final_put_task(task, task->tk_workqueue);
888} 935}
889 936
890int rpciod_up(void) 937int rpciod_up(void)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 9df1eadc912a..1a10dcd999ea 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -1335,6 +1335,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1335 p, 0, length, DMA_FROM_DEVICE); 1335 p, 0, length, DMA_FROM_DEVICE);
1336 if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) { 1336 if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
1337 put_page(p); 1337 put_page(p);
1338 svc_rdma_put_context(ctxt, 1);
1338 return; 1339 return;
1339 } 1340 }
1340 atomic_inc(&xprt->sc_dma_used); 1341 atomic_inc(&xprt->sc_dma_used);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index c431f5a57960..be96d429b475 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1631,7 +1631,8 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
1631 } 1631 }
1632 xs_reclassify_socket(family, sock); 1632 xs_reclassify_socket(family, sock);
1633 1633
1634 if (xs_bind(transport, sock)) { 1634 err = xs_bind(transport, sock);
1635 if (err) {
1635 sock_release(sock); 1636 sock_release(sock);
1636 goto out; 1637 goto out;
1637 } 1638 }
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index dd419d286204..ba5b8c208498 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -850,7 +850,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
850 * Get the parent directory, calculate the hash for last 850 * Get the parent directory, calculate the hash for last
851 * component. 851 * component.
852 */ 852 */
853 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd); 853 err = kern_path_parent(sunaddr->sun_path, &nd);
854 if (err) 854 if (err)
855 goto out_mknod_parent; 855 goto out_mknod_parent;
856 856
@@ -1724,7 +1724,11 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1724 1724
1725 msg->msg_namelen = 0; 1725 msg->msg_namelen = 0;
1726 1726
1727 mutex_lock(&u->readlock); 1727 err = mutex_lock_interruptible(&u->readlock);
1728 if (err) {
1729 err = sock_intr_errno(sock_rcvtimeo(sk, noblock));
1730 goto out;
1731 }
1728 1732
1729 skb = skb_recv_datagram(sk, flags, noblock, &err); 1733 skb = skb_recv_datagram(sk, flags, noblock, &err);
1730 if (!skb) { 1734 if (!skb) {
@@ -1864,7 +1868,11 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1864 memset(&tmp_scm, 0, sizeof(tmp_scm)); 1868 memset(&tmp_scm, 0, sizeof(tmp_scm));
1865 } 1869 }
1866 1870
1867 mutex_lock(&u->readlock); 1871 err = mutex_lock_interruptible(&u->readlock);
1872 if (err) {
1873 err = sock_intr_errno(timeo);
1874 goto out;
1875 }
1868 1876
1869 do { 1877 do {
1870 int chunk; 1878 int chunk;
@@ -1895,11 +1903,12 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1895 1903
1896 timeo = unix_stream_data_wait(sk, timeo); 1904 timeo = unix_stream_data_wait(sk, timeo);
1897 1905
1898 if (signal_pending(current)) { 1906 if (signal_pending(current)
1907 || mutex_lock_interruptible(&u->readlock)) {
1899 err = sock_intr_errno(timeo); 1908 err = sock_intr_errno(timeo);
1900 goto out; 1909 goto out;
1901 } 1910 }
1902 mutex_lock(&u->readlock); 1911
1903 continue; 1912 continue;
1904 unlock: 1913 unlock:
1905 unix_state_unlock(sk); 1914 unix_state_unlock(sk);
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index f89f83bf828e..b6f4b994eb35 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -104,7 +104,7 @@ struct sock *unix_get_socket(struct file *filp)
104 /* 104 /*
105 * Socket ? 105 * Socket ?
106 */ 106 */
107 if (S_ISSOCK(inode->i_mode)) { 107 if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
108 struct socket *sock = SOCKET_I(inode); 108 struct socket *sock = SOCKET_I(inode);
109 struct sock *s = sock->sk; 109 struct sock *s = sock->sk;
110 110
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 3e5dbd4e4cd5..d112f038edf0 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -802,11 +802,11 @@ int cfg80211_wext_siwfreq(struct net_device *dev,
802 return freq; 802 return freq;
803 if (freq == 0) 803 if (freq == 0)
804 return -EINVAL; 804 return -EINVAL;
805 wdev_lock(wdev);
806 mutex_lock(&rdev->devlist_mtx); 805 mutex_lock(&rdev->devlist_mtx);
806 wdev_lock(wdev);
807 err = cfg80211_set_freq(rdev, wdev, freq, NL80211_CHAN_NO_HT); 807 err = cfg80211_set_freq(rdev, wdev, freq, NL80211_CHAN_NO_HT);
808 mutex_unlock(&rdev->devlist_mtx);
809 wdev_unlock(wdev); 808 wdev_unlock(wdev);
809 mutex_unlock(&rdev->devlist_mtx);
810 return err; 810 return err;
811 default: 811 default:
812 return -EOPNOTSUPP; 812 return -EOPNOTSUPP;
diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c
index 6c94c6ce2925..291228e25984 100644
--- a/scripts/basic/fixdep.c
+++ b/scripts/basic/fixdep.c
@@ -309,6 +309,11 @@ static void do_config_file(const char *filename)
309 close(fd); 309 close(fd);
310} 310}
311 311
312/*
313 * Important: The below generated source_foo.o and deps_foo.o variable
314 * assignments are parsed not only by make, but also by the rather simple
315 * parser in scripts/mod/sumversion.c.
316 */
312static void parse_dep_file(void *map, size_t len) 317static void parse_dep_file(void *map, size_t len)
313{ 318{
314 char *m = map; 319 char *m = map;
@@ -323,7 +328,6 @@ static void parse_dep_file(void *map, size_t len)
323 exit(1); 328 exit(1);
324 } 329 }
325 memcpy(s, m, p-m); s[p-m] = 0; 330 memcpy(s, m, p-m); s[p-m] = 0;
326 printf("deps_%s := \\\n", target);
327 m = p+1; 331 m = p+1;
328 332
329 clear_config(); 333 clear_config();
@@ -343,12 +347,15 @@ static void parse_dep_file(void *map, size_t len)
343 strrcmp(s, "arch/um/include/uml-config.h") && 347 strrcmp(s, "arch/um/include/uml-config.h") &&
344 strrcmp(s, ".ver")) { 348 strrcmp(s, ".ver")) {
345 /* 349 /*
346 * Do not output the first dependency (the 350 * Do not list the source file as dependency, so that
347 * source file), so that kbuild is not confused 351 * kbuild is not confused if a .c file is rewritten
348 * if a .c file is rewritten into .S or vice 352 * into .S or vice versa. Storing it in source_* is
349 * versa. 353 * needed for modpost to compute srcversions.
350 */ 354 */
351 if (!first) 355 if (first) {
356 printf("source_%s := %s\n\n", target, s);
357 printf("deps_%s := \\\n", target);
358 } else
352 printf(" %s \\\n", s); 359 printf(" %s \\\n", s);
353 do_config_file(s); 360 do_config_file(s);
354 } 361 }
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 4c0383da1c9a..58848e3e392c 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2654,11 +2654,6 @@ sub process {
2654 WARN("Use of volatile is usually wrong: see Documentation/volatile-considered-harmful.txt\n" . $herecurr); 2654 WARN("Use of volatile is usually wrong: see Documentation/volatile-considered-harmful.txt\n" . $herecurr);
2655 } 2655 }
2656 2656
2657# SPIN_LOCK_UNLOCKED & RW_LOCK_UNLOCKED are deprecated
2658 if ($line =~ /\b(SPIN_LOCK_UNLOCKED|RW_LOCK_UNLOCKED)/) {
2659 ERROR("Use of $1 is deprecated: see Documentation/spinlocks.txt\n" . $herecurr);
2660 }
2661
2662# warn about #if 0 2657# warn about #if 0
2663 if ($line =~ /^.\s*\#\s*if\s+0\b/) { 2658 if ($line =~ /^.\s*\#\s*if\s+0\b/) {
2664 CHK("if this code is redundant consider removing it\n" . 2659 CHK("if this code is redundant consider removing it\n" .
diff --git a/scripts/kconfig/streamline_config.pl b/scripts/kconfig/streamline_config.pl
index fd81fc33d633..a4fe923c0131 100644
--- a/scripts/kconfig/streamline_config.pl
+++ b/scripts/kconfig/streamline_config.pl
@@ -1,6 +1,6 @@
1#!/usr/bin/perl -w 1#!/usr/bin/perl -w
2# 2#
3# Copywrite 2005-2009 - Steven Rostedt 3# Copyright 2005-2009 - Steven Rostedt
4# Licensed under the terms of the GNU GPL License version 2 4# Licensed under the terms of the GNU GPL License version 2
5# 5#
6# It's simple enough to figure out how this works. 6# It's simple enough to figure out how this works.
diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c
index ecf9c7dc1825..9dfcd6d988da 100644
--- a/scripts/mod/sumversion.c
+++ b/scripts/mod/sumversion.c
@@ -300,8 +300,8 @@ static int is_static_library(const char *objfile)
300 return 0; 300 return 0;
301} 301}
302 302
303/* We have dir/file.o. Open dir/.file.o.cmd, look for deps_ line to 303/* We have dir/file.o. Open dir/.file.o.cmd, look for source_ and deps_ line
304 * figure out source file. */ 304 * to figure out source files. */
305static int parse_source_files(const char *objfile, struct md4_ctx *md) 305static int parse_source_files(const char *objfile, struct md4_ctx *md)
306{ 306{
307 char *cmd, *file, *line, *dir; 307 char *cmd, *file, *line, *dir;
@@ -340,6 +340,21 @@ static int parse_source_files(const char *objfile, struct md4_ctx *md)
340 */ 340 */
341 while ((line = get_next_line(&pos, file, flen)) != NULL) { 341 while ((line = get_next_line(&pos, file, flen)) != NULL) {
342 char* p = line; 342 char* p = line;
343
344 if (strncmp(line, "source_", sizeof("source_")-1) == 0) {
345 p = strrchr(line, ' ');
346 if (!p) {
347 warn("malformed line: %s\n", line);
348 goto out_file;
349 }
350 p++;
351 if (!parse_file(p, md)) {
352 warn("could not open %s: %s\n",
353 p, strerror(errno));
354 goto out_file;
355 }
356 continue;
357 }
343 if (strncmp(line, "deps_", sizeof("deps_")-1) == 0) { 358 if (strncmp(line, "deps_", sizeof("deps_")-1) == 0) {
344 check_files = 1; 359 check_files = 1;
345 continue; 360 continue;
diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index 038b3d1e2981..f9f6f52db772 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c
@@ -206,7 +206,8 @@ static uint32_t (*w2)(uint16_t);
206static int 206static int
207is_mcounted_section_name(char const *const txtname) 207is_mcounted_section_name(char const *const txtname)
208{ 208{
209 return 0 == strcmp(".text", txtname) || 209 return 0 == strcmp(".text", txtname) ||
210 0 == strcmp(".ref.text", txtname) ||
210 0 == strcmp(".sched.text", txtname) || 211 0 == strcmp(".sched.text", txtname) ||
211 0 == strcmp(".spinlock.text", txtname) || 212 0 == strcmp(".spinlock.text", txtname) ||
212 0 == strcmp(".irqentry.text", txtname) || 213 0 == strcmp(".irqentry.text", txtname) ||
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 1d7963f4ee79..4be0deea71ca 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -130,6 +130,7 @@ if ($inputfile =~ m,kernel/trace/ftrace\.o$,) {
130# Acceptable sections to record. 130# Acceptable sections to record.
131my %text_sections = ( 131my %text_sections = (
132 ".text" => 1, 132 ".text" => 1,
133 ".ref.text" => 1,
133 ".sched.text" => 1, 134 ".sched.text" => 1,
134 ".spinlock.text" => 1, 135 ".spinlock.text" => 1,
135 ".irqentry.text" => 1, 136 ".irqentry.text" => 1,
diff --git a/scripts/rt-tester/rt-tester.py b/scripts/rt-tester/rt-tester.py
index 44423b4dcb82..8c81d76959ee 100644
--- a/scripts/rt-tester/rt-tester.py
+++ b/scripts/rt-tester/rt-tester.py
@@ -33,8 +33,6 @@ cmd_opcodes = {
33 "lockintnowait" : "6", 33 "lockintnowait" : "6",
34 "lockcont" : "7", 34 "lockcont" : "7",
35 "unlock" : "8", 35 "unlock" : "8",
36 "lockbkl" : "9",
37 "unlockbkl" : "10",
38 "signal" : "11", 36 "signal" : "11",
39 "resetevent" : "98", 37 "resetevent" : "98",
40 "reset" : "99", 38 "reset" : "99",
diff --git a/scripts/rt-tester/t2-l1-2rt-sameprio.tst b/scripts/rt-tester/t2-l1-2rt-sameprio.tst
index 8821f27cc8be..3710c8b2090d 100644
--- a/scripts/rt-tester/t2-l1-2rt-sameprio.tst
+++ b/scripts/rt-tester/t2-l1-2rt-sameprio.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal 0 22# signal 0
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t2-l1-pi.tst b/scripts/rt-tester/t2-l1-pi.tst
index cde1f189a02b..b4cc95975adb 100644
--- a/scripts/rt-tester/t2-l1-pi.tst
+++ b/scripts/rt-tester/t2-l1-pi.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal 0 22# signal 0
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t2-l1-signal.tst b/scripts/rt-tester/t2-l1-signal.tst
index 3ab0bfc49950..1b57376cc1f7 100644
--- a/scripts/rt-tester/t2-l1-signal.tst
+++ b/scripts/rt-tester/t2-l1-signal.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal 0 22# signal 0
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t2-l2-2rt-deadlock.tst b/scripts/rt-tester/t2-l2-2rt-deadlock.tst
index f4b5d5d6215f..68b10629b6f4 100644
--- a/scripts/rt-tester/t2-l2-2rt-deadlock.tst
+++ b/scripts/rt-tester/t2-l2-2rt-deadlock.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal 0 22# signal 0
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t3-l1-pi-1rt.tst b/scripts/rt-tester/t3-l1-pi-1rt.tst
index 63440ca2cce9..8e6c8b11ae56 100644
--- a/scripts/rt-tester/t3-l1-pi-1rt.tst
+++ b/scripts/rt-tester/t3-l1-pi-1rt.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t3-l1-pi-2rt.tst b/scripts/rt-tester/t3-l1-pi-2rt.tst
index e5816fe67df3..69c2212fc520 100644
--- a/scripts/rt-tester/t3-l1-pi-2rt.tst
+++ b/scripts/rt-tester/t3-l1-pi-2rt.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t3-l1-pi-3rt.tst b/scripts/rt-tester/t3-l1-pi-3rt.tst
index 718b82b5d3bb..9b0f1eb26a88 100644
--- a/scripts/rt-tester/t3-l1-pi-3rt.tst
+++ b/scripts/rt-tester/t3-l1-pi-3rt.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t3-l1-pi-signal.tst b/scripts/rt-tester/t3-l1-pi-signal.tst
index c6e213563498..39ec74ab06ee 100644
--- a/scripts/rt-tester/t3-l1-pi-signal.tst
+++ b/scripts/rt-tester/t3-l1-pi-signal.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t3-l1-pi-steal.tst b/scripts/rt-tester/t3-l1-pi-steal.tst
index f53749d59d79..e03db7e010fa 100644
--- a/scripts/rt-tester/t3-l1-pi-steal.tst
+++ b/scripts/rt-tester/t3-l1-pi-steal.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t3-l2-pi.tst b/scripts/rt-tester/t3-l2-pi.tst
index cdc3e4fd7bac..7b59100d3e48 100644
--- a/scripts/rt-tester/t3-l2-pi.tst
+++ b/scripts/rt-tester/t3-l2-pi.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t4-l2-pi-deboost.tst b/scripts/rt-tester/t4-l2-pi-deboost.tst
index baa14137f473..2f0e049d6443 100644
--- a/scripts/rt-tester/t4-l2-pi-deboost.tst
+++ b/scripts/rt-tester/t4-l2-pi-deboost.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst b/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst
index e6ec0c81b54d..04f4034ff895 100644
--- a/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst
+++ b/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/scripts/rt-tester/t5-l4-pi-boost-deboost.tst b/scripts/rt-tester/t5-l4-pi-boost-deboost.tst
index ca64f8bbf4bc..a48a6ee29ddc 100644
--- a/scripts/rt-tester/t5-l4-pi-boost-deboost.tst
+++ b/scripts/rt-tester/t5-l4-pi-boost-deboost.tst
@@ -19,8 +19,6 @@
19# lockintnowait lock nr (0-7) 19# lockintnowait lock nr (0-7)
20# lockcont lock nr (0-7) 20# lockcont lock nr (0-7)
21# unlock lock nr (0-7) 21# unlock lock nr (0-7)
22# lockbkl lock nr (0-7)
23# unlockbkl lock nr (0-7)
24# signal thread to signal (0-7) 22# signal thread to signal (0-7)
25# reset 0 23# reset 0
26# resetevent 0 24# resetevent 0
@@ -39,9 +37,6 @@
39# blocked lock nr (0-7) 37# blocked lock nr (0-7)
40# blockedwake lock nr (0-7) 38# blockedwake lock nr (0-7)
41# unlocked lock nr (0-7) 39# unlocked lock nr (0-7)
42# lockedbkl dont care
43# blockedbkl dont care
44# unlockedbkl dont care
45# opcodeeq command opcode or number 40# opcodeeq command opcode or number
46# opcodelt number 41# opcodelt number
47# opcodegt number 42# opcodegt number
diff --git a/security/commoncap.c b/security/commoncap.c
index 64c2ed9c9015..dbfdaed4cc66 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -93,7 +93,7 @@ int cap_capable(struct task_struct *tsk, const struct cred *cred, int cap,
93 * Determine whether the current process may set the system clock and timezone 93 * Determine whether the current process may set the system clock and timezone
94 * information, returning 0 if permission granted, -ve if denied. 94 * information, returning 0 if permission granted, -ve if denied.
95 */ 95 */
96int cap_settime(struct timespec *ts, struct timezone *tz) 96int cap_settime(const struct timespec *ts, const struct timezone *tz)
97{ 97{
98 if (!capable(CAP_SYS_TIME)) 98 if (!capable(CAP_SYS_TIME))
99 return -EPERM; 99 return -EPERM;
diff --git a/security/security.c b/security/security.c
index 7b7308ace8c5..bb33ecadcf95 100644
--- a/security/security.c
+++ b/security/security.c
@@ -201,7 +201,7 @@ int security_syslog(int type)
201 return security_ops->syslog(type); 201 return security_ops->syslog(type);
202} 202}
203 203
204int security_settime(struct timespec *ts, struct timezone *tz) 204int security_settime(const struct timespec *ts, const struct timezone *tz)
205{ 205{
206 return security_ops->settime(ts, tz); 206 return security_ops->settime(ts, tz);
207} 207}
diff --git a/sound/core/jack.c b/sound/core/jack.c
index 4902ae568730..53b53e97c896 100644
--- a/sound/core/jack.c
+++ b/sound/core/jack.c
@@ -141,6 +141,7 @@ int snd_jack_new(struct snd_card *card, const char *id, int type,
141 141
142fail_input: 142fail_input:
143 input_free_device(jack->input_dev); 143 input_free_device(jack->input_dev);
144 kfree(jack->id);
144 kfree(jack); 145 kfree(jack);
145 return err; 146 return err;
146} 147}
diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c
index a07b031090d8..067982f4f182 100644
--- a/sound/pci/hda/patch_cirrus.c
+++ b/sound/pci/hda/patch_cirrus.c
@@ -1039,9 +1039,11 @@ static struct hda_verb cs_errata_init_verbs[] = {
1039 {0x11, AC_VERB_SET_PROC_COEF, 0x0008}, 1039 {0x11, AC_VERB_SET_PROC_COEF, 0x0008},
1040 {0x11, AC_VERB_SET_PROC_STATE, 0x00}, 1040 {0x11, AC_VERB_SET_PROC_STATE, 0x00},
1041 1041
1042#if 0 /* Don't to set to D3 as we are in power-up sequence */
1042 {0x07, AC_VERB_SET_POWER_STATE, 0x03}, /* S/PDIF Rx: D3 */ 1043 {0x07, AC_VERB_SET_POWER_STATE, 0x03}, /* S/PDIF Rx: D3 */
1043 {0x08, AC_VERB_SET_POWER_STATE, 0x03}, /* S/PDIF Tx: D3 */ 1044 {0x08, AC_VERB_SET_POWER_STATE, 0x03}, /* S/PDIF Tx: D3 */
1044 /*{0x01, AC_VERB_SET_POWER_STATE, 0x03},*/ /* AFG: D3 This is already handled */ 1045 /*{0x01, AC_VERB_SET_POWER_STATE, 0x03},*/ /* AFG: D3 This is already handled */
1046#endif
1045 1047
1046 {} /* terminator */ 1048 {} /* terminator */
1047}; 1049};
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index dd7c5c12225d..4d5004e693f0 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -3114,6 +3114,8 @@ static struct snd_pci_quirk cxt5066_cfg_tbl[] = {
3114 SND_PCI_QUIRK(0x1028, 0x0401, "Dell Vostro 1014", CXT5066_DELL_VOSTRO), 3114 SND_PCI_QUIRK(0x1028, 0x0401, "Dell Vostro 1014", CXT5066_DELL_VOSTRO),
3115 SND_PCI_QUIRK(0x1028, 0x0402, "Dell Vostro", CXT5066_DELL_VOSTRO), 3115 SND_PCI_QUIRK(0x1028, 0x0402, "Dell Vostro", CXT5066_DELL_VOSTRO),
3116 SND_PCI_QUIRK(0x1028, 0x0408, "Dell Inspiron One 19T", CXT5066_IDEAPAD), 3116 SND_PCI_QUIRK(0x1028, 0x0408, "Dell Inspiron One 19T", CXT5066_IDEAPAD),
3117 SND_PCI_QUIRK(0x1028, 0x050f, "Dell Inspiron", CXT5066_IDEAPAD),
3118 SND_PCI_QUIRK(0x1028, 0x0510, "Dell Vostro", CXT5066_IDEAPAD),
3117 SND_PCI_QUIRK(0x103c, 0x360b, "HP G60", CXT5066_HP_LAPTOP), 3119 SND_PCI_QUIRK(0x103c, 0x360b, "HP G60", CXT5066_HP_LAPTOP),
3118 SND_PCI_QUIRK(0x1043, 0x13f3, "Asus A52J", CXT5066_ASUS), 3120 SND_PCI_QUIRK(0x1043, 0x13f3, "Asus A52J", CXT5066_ASUS),
3119 SND_PCI_QUIRK(0x1043, 0x1643, "Asus K52JU", CXT5066_ASUS), 3121 SND_PCI_QUIRK(0x1043, 0x1643, "Asus K52JU", CXT5066_ASUS),
@@ -3937,6 +3939,8 @@ static struct hda_codec_preset snd_hda_preset_conexant[] = {
3937 .patch = patch_cxt5066 }, 3939 .patch = patch_cxt5066 },
3938 { .id = 0x14f15069, .name = "CX20585", 3940 { .id = 0x14f15069, .name = "CX20585",
3939 .patch = patch_cxt5066 }, 3941 .patch = patch_cxt5066 },
3942 { .id = 0x14f1506e, .name = "CX20590",
3943 .patch = patch_cxt5066 },
3940 { .id = 0x14f15097, .name = "CX20631", 3944 { .id = 0x14f15097, .name = "CX20631",
3941 .patch = patch_conexant_auto }, 3945 .patch = patch_conexant_auto },
3942 { .id = 0x14f15098, .name = "CX20632", 3946 { .id = 0x14f15098, .name = "CX20632",
@@ -3963,6 +3967,7 @@ MODULE_ALIAS("snd-hda-codec-id:14f15066");
3963MODULE_ALIAS("snd-hda-codec-id:14f15067"); 3967MODULE_ALIAS("snd-hda-codec-id:14f15067");
3964MODULE_ALIAS("snd-hda-codec-id:14f15068"); 3968MODULE_ALIAS("snd-hda-codec-id:14f15068");
3965MODULE_ALIAS("snd-hda-codec-id:14f15069"); 3969MODULE_ALIAS("snd-hda-codec-id:14f15069");
3970MODULE_ALIAS("snd-hda-codec-id:14f1506e");
3966MODULE_ALIAS("snd-hda-codec-id:14f15097"); 3971MODULE_ALIAS("snd-hda-codec-id:14f15097");
3967MODULE_ALIAS("snd-hda-codec-id:14f15098"); 3972MODULE_ALIAS("snd-hda-codec-id:14f15098");
3968MODULE_ALIAS("snd-hda-codec-id:14f150a1"); 3973MODULE_ALIAS("snd-hda-codec-id:14f150a1");
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index a58767736727..ec0fa2dd0a27 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -1634,6 +1634,9 @@ static struct hda_codec_preset snd_hda_preset_hdmi[] = {
1634{ .id = 0x10de0012, .name = "GPU 12 HDMI/DP", .patch = patch_nvhdmi_8ch_89 }, 1634{ .id = 0x10de0012, .name = "GPU 12 HDMI/DP", .patch = patch_nvhdmi_8ch_89 },
1635{ .id = 0x10de0013, .name = "GPU 13 HDMI/DP", .patch = patch_nvhdmi_8ch_89 }, 1635{ .id = 0x10de0013, .name = "GPU 13 HDMI/DP", .patch = patch_nvhdmi_8ch_89 },
1636{ .id = 0x10de0014, .name = "GPU 14 HDMI/DP", .patch = patch_nvhdmi_8ch_89 }, 1636{ .id = 0x10de0014, .name = "GPU 14 HDMI/DP", .patch = patch_nvhdmi_8ch_89 },
1637{ .id = 0x10de0015, .name = "GPU 15 HDMI/DP", .patch = patch_nvhdmi_8ch_89 },
1638{ .id = 0x10de0016, .name = "GPU 16 HDMI/DP", .patch = patch_nvhdmi_8ch_89 },
1639/* 17 is known to be absent */
1637{ .id = 0x10de0018, .name = "GPU 18 HDMI/DP", .patch = patch_nvhdmi_8ch_89 }, 1640{ .id = 0x10de0018, .name = "GPU 18 HDMI/DP", .patch = patch_nvhdmi_8ch_89 },
1638{ .id = 0x10de0019, .name = "GPU 19 HDMI/DP", .patch = patch_nvhdmi_8ch_89 }, 1641{ .id = 0x10de0019, .name = "GPU 19 HDMI/DP", .patch = patch_nvhdmi_8ch_89 },
1639{ .id = 0x10de001a, .name = "GPU 1a HDMI/DP", .patch = patch_nvhdmi_8ch_89 }, 1642{ .id = 0x10de001a, .name = "GPU 1a HDMI/DP", .patch = patch_nvhdmi_8ch_89 },
@@ -1676,6 +1679,8 @@ MODULE_ALIAS("snd-hda-codec-id:10de0011");
1676MODULE_ALIAS("snd-hda-codec-id:10de0012"); 1679MODULE_ALIAS("snd-hda-codec-id:10de0012");
1677MODULE_ALIAS("snd-hda-codec-id:10de0013"); 1680MODULE_ALIAS("snd-hda-codec-id:10de0013");
1678MODULE_ALIAS("snd-hda-codec-id:10de0014"); 1681MODULE_ALIAS("snd-hda-codec-id:10de0014");
1682MODULE_ALIAS("snd-hda-codec-id:10de0015");
1683MODULE_ALIAS("snd-hda-codec-id:10de0016");
1679MODULE_ALIAS("snd-hda-codec-id:10de0018"); 1684MODULE_ALIAS("snd-hda-codec-id:10de0018");
1680MODULE_ALIAS("snd-hda-codec-id:10de0019"); 1685MODULE_ALIAS("snd-hda-codec-id:10de0019");
1681MODULE_ALIAS("snd-hda-codec-id:10de001a"); 1686MODULE_ALIAS("snd-hda-codec-id:10de001a");
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 3328a259a242..4261bb8eec1d 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -1133,11 +1133,8 @@ static void alc_automute_speaker(struct hda_codec *codec, int pinctl)
1133 nid = spec->autocfg.hp_pins[i]; 1133 nid = spec->autocfg.hp_pins[i];
1134 if (!nid) 1134 if (!nid)
1135 break; 1135 break;
1136 if (snd_hda_jack_detect(codec, nid)) { 1136 alc_report_jack(codec, nid);
1137 spec->jack_present = 1; 1137 spec->jack_present |= snd_hda_jack_detect(codec, nid);
1138 break;
1139 }
1140 alc_report_jack(codec, spec->autocfg.hp_pins[i]);
1141 } 1138 }
1142 1139
1143 mute = spec->jack_present ? HDA_AMP_MUTE : 0; 1140 mute = spec->jack_present ? HDA_AMP_MUTE : 0;
@@ -15015,7 +15012,7 @@ static struct snd_pci_quirk alc269_cfg_tbl[] = {
15015 SND_PCI_QUIRK(0x1043, 0x11e3, "ASUS U33Jc", ALC269VB_AMIC), 15012 SND_PCI_QUIRK(0x1043, 0x11e3, "ASUS U33Jc", ALC269VB_AMIC),
15016 SND_PCI_QUIRK(0x1043, 0x1273, "ASUS UL80Jt", ALC269VB_AMIC), 15013 SND_PCI_QUIRK(0x1043, 0x1273, "ASUS UL80Jt", ALC269VB_AMIC),
15017 SND_PCI_QUIRK(0x1043, 0x1283, "ASUS U53Jc", ALC269_AMIC), 15014 SND_PCI_QUIRK(0x1043, 0x1283, "ASUS U53Jc", ALC269_AMIC),
15018 SND_PCI_QUIRK(0x1043, 0x12b3, "ASUS N82Jv", ALC269_AMIC), 15015 SND_PCI_QUIRK(0x1043, 0x12b3, "ASUS N82JV", ALC269VB_AMIC),
15019 SND_PCI_QUIRK(0x1043, 0x12d3, "ASUS N61Jv", ALC269_AMIC), 15016 SND_PCI_QUIRK(0x1043, 0x12d3, "ASUS N61Jv", ALC269_AMIC),
15020 SND_PCI_QUIRK(0x1043, 0x13a3, "ASUS UL30Vt", ALC269_AMIC), 15017 SND_PCI_QUIRK(0x1043, 0x13a3, "ASUS UL30Vt", ALC269_AMIC),
15021 SND_PCI_QUIRK(0x1043, 0x1373, "ASUS G73JX", ALC269_AMIC), 15018 SND_PCI_QUIRK(0x1043, 0x1373, "ASUS G73JX", ALC269_AMIC),
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 9ea48b425d0b..bd7b123f6440 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -586,7 +586,12 @@ static hda_nid_t stac92hd83xxx_pin_nids[10] = {
586 0x0f, 0x10, 0x11, 0x1f, 0x20, 586 0x0f, 0x10, 0x11, 0x1f, 0x20,
587}; 587};
588 588
589static hda_nid_t stac92hd88xxx_pin_nids[10] = { 589static hda_nid_t stac92hd87xxx_pin_nids[6] = {
590 0x0a, 0x0b, 0x0c, 0x0d,
591 0x0f, 0x11,
592};
593
594static hda_nid_t stac92hd88xxx_pin_nids[8] = {
590 0x0a, 0x0b, 0x0c, 0x0d, 595 0x0a, 0x0b, 0x0c, 0x0d,
591 0x0f, 0x11, 0x1f, 0x20, 596 0x0f, 0x11, 0x1f, 0x20,
592}; 597};
@@ -5430,12 +5435,13 @@ again:
5430 switch (codec->vendor_id) { 5435 switch (codec->vendor_id) {
5431 case 0x111d76d1: 5436 case 0x111d76d1:
5432 case 0x111d76d9: 5437 case 0x111d76d9:
5438 case 0x111d76e5:
5433 spec->dmic_nids = stac92hd87b_dmic_nids; 5439 spec->dmic_nids = stac92hd87b_dmic_nids;
5434 spec->num_dmics = stac92xx_connected_ports(codec, 5440 spec->num_dmics = stac92xx_connected_ports(codec,
5435 stac92hd87b_dmic_nids, 5441 stac92hd87b_dmic_nids,
5436 STAC92HD87B_NUM_DMICS); 5442 STAC92HD87B_NUM_DMICS);
5437 spec->num_pins = ARRAY_SIZE(stac92hd88xxx_pin_nids); 5443 spec->num_pins = ARRAY_SIZE(stac92hd87xxx_pin_nids);
5438 spec->pin_nids = stac92hd88xxx_pin_nids; 5444 spec->pin_nids = stac92hd87xxx_pin_nids;
5439 spec->mono_nid = 0; 5445 spec->mono_nid = 0;
5440 spec->num_pwrs = 0; 5446 spec->num_pwrs = 0;
5441 break; 5447 break;
@@ -5443,6 +5449,7 @@ again:
5443 case 0x111d7667: 5449 case 0x111d7667:
5444 case 0x111d7668: 5450 case 0x111d7668:
5445 case 0x111d7669: 5451 case 0x111d7669:
5452 case 0x111d76e3:
5446 spec->num_dmics = stac92xx_connected_ports(codec, 5453 spec->num_dmics = stac92xx_connected_ports(codec,
5447 stac92hd88xxx_dmic_nids, 5454 stac92hd88xxx_dmic_nids,
5448 STAC92HD88XXX_NUM_DMICS); 5455 STAC92HD88XXX_NUM_DMICS);
@@ -6387,6 +6394,8 @@ static struct hda_codec_preset snd_hda_preset_sigmatel[] = {
6387 { .id = 0x111d76cd, .name = "92HD89F2", .patch = patch_stac92hd73xx }, 6394 { .id = 0x111d76cd, .name = "92HD89F2", .patch = patch_stac92hd73xx },
6388 { .id = 0x111d76ce, .name = "92HD89F1", .patch = patch_stac92hd73xx }, 6395 { .id = 0x111d76ce, .name = "92HD89F1", .patch = patch_stac92hd73xx },
6389 { .id = 0x111d76e0, .name = "92HD91BXX", .patch = patch_stac92hd83xxx}, 6396 { .id = 0x111d76e0, .name = "92HD91BXX", .patch = patch_stac92hd83xxx},
6397 { .id = 0x111d76e3, .name = "92HD98BXX", .patch = patch_stac92hd83xxx},
6398 { .id = 0x111d76e5, .name = "92HD99BXX", .patch = patch_stac92hd83xxx},
6390 { .id = 0x111d76e7, .name = "92HD90BXX", .patch = patch_stac92hd83xxx}, 6399 { .id = 0x111d76e7, .name = "92HD90BXX", .patch = patch_stac92hd83xxx},
6391 {} /* terminator */ 6400 {} /* terminator */
6392}; 6401};
diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
index a76c3260d941..63b0054200a8 100644
--- a/sound/pci/hda/patch_via.c
+++ b/sound/pci/hda/patch_via.c
@@ -567,7 +567,7 @@ static void via_auto_init_analog_input(struct hda_codec *codec)
567 hda_nid_t nid = cfg->inputs[i].pin; 567 hda_nid_t nid = cfg->inputs[i].pin;
568 if (spec->smart51_enabled && is_smart51_pins(spec, nid)) 568 if (spec->smart51_enabled && is_smart51_pins(spec, nid))
569 ctl = PIN_OUT; 569 ctl = PIN_OUT;
570 else if (i == AUTO_PIN_MIC) 570 else if (cfg->inputs[i].type == AUTO_PIN_MIC)
571 ctl = PIN_VREF50; 571 ctl = PIN_VREF50;
572 else 572 else
573 ctl = PIN_IN; 573 ctl = PIN_IN;
diff --git a/sound/soc/codecs/cx20442.c b/sound/soc/codecs/cx20442.c
index bb4bf65b9e7e..0bb424af956f 100644
--- a/sound/soc/codecs/cx20442.c
+++ b/sound/soc/codecs/cx20442.c
@@ -367,7 +367,7 @@ static int cx20442_codec_remove(struct snd_soc_codec *codec)
367 return 0; 367 return 0;
368} 368}
369 369
370static const u8 cx20442_reg = CX20442_TELOUT | CX20442_MIC; 370static const u8 cx20442_reg;
371 371
372static struct snd_soc_codec_driver cx20442_codec_dev = { 372static struct snd_soc_codec_driver cx20442_codec_dev = {
373 .probe = cx20442_codec_probe, 373 .probe = cx20442_codec_probe,
diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c
index 987476a5895f..017d99ceb42e 100644
--- a/sound/soc/codecs/wm8903.c
+++ b/sound/soc/codecs/wm8903.c
@@ -1482,7 +1482,7 @@ int wm8903_mic_detect(struct snd_soc_codec *codec, struct snd_soc_jack *jack,
1482 WM8903_MICDET_EINT | WM8903_MICSHRT_EINT, 1482 WM8903_MICDET_EINT | WM8903_MICSHRT_EINT,
1483 irq_mask); 1483 irq_mask);
1484 1484
1485 if (det && shrt) { 1485 if (det || shrt) {
1486 /* Enable mic detection, this may not have been set through 1486 /* Enable mic detection, this may not have been set through
1487 * platform data (eg, if the defaults are OK). */ 1487 * platform data (eg, if the defaults are OK). */
1488 snd_soc_update_bits(codec, WM8903_WRITE_SEQUENCER_0, 1488 snd_soc_update_bits(codec, WM8903_WRITE_SEQUENCER_0,
diff --git a/sound/soc/codecs/wm8903.h b/sound/soc/codecs/wm8903.h
index e8490f3edd03..e3ec2433b215 100644
--- a/sound/soc/codecs/wm8903.h
+++ b/sound/soc/codecs/wm8903.h
@@ -165,7 +165,7 @@ extern int wm8903_mic_detect(struct snd_soc_codec *codec,
165 165
166#define WM8903_VMID_RES_50K 2 166#define WM8903_VMID_RES_50K 2
167#define WM8903_VMID_RES_250K 3 167#define WM8903_VMID_RES_250K 3
168#define WM8903_VMID_RES_5K 4 168#define WM8903_VMID_RES_5K 6
169 169
170/* 170/*
171 * R8 (0x08) - Analogue DAC 0 171 * R8 (0x08) - Analogue DAC 0
diff --git a/sound/soc/codecs/wm8978.c b/sound/soc/codecs/wm8978.c
index 4bbc3442703f..8dfb0a0da673 100644
--- a/sound/soc/codecs/wm8978.c
+++ b/sound/soc/codecs/wm8978.c
@@ -145,18 +145,18 @@ static const struct snd_kcontrol_new wm8978_snd_controls[] = {
145 SOC_SINGLE("DAC Playback Limiter Threshold", 145 SOC_SINGLE("DAC Playback Limiter Threshold",
146 WM8978_DAC_LIMITER_2, 4, 7, 0), 146 WM8978_DAC_LIMITER_2, 4, 7, 0),
147 SOC_SINGLE("DAC Playback Limiter Boost", 147 SOC_SINGLE("DAC Playback Limiter Boost",
148 WM8978_DAC_LIMITER_2, 0, 15, 0), 148 WM8978_DAC_LIMITER_2, 0, 12, 0),
149 149
150 SOC_ENUM("ALC Enable Switch", alc1), 150 SOC_ENUM("ALC Enable Switch", alc1),
151 SOC_SINGLE("ALC Capture Min Gain", WM8978_ALC_CONTROL_1, 0, 7, 0), 151 SOC_SINGLE("ALC Capture Min Gain", WM8978_ALC_CONTROL_1, 0, 7, 0),
152 SOC_SINGLE("ALC Capture Max Gain", WM8978_ALC_CONTROL_1, 3, 7, 0), 152 SOC_SINGLE("ALC Capture Max Gain", WM8978_ALC_CONTROL_1, 3, 7, 0),
153 153
154 SOC_SINGLE("ALC Capture Hold", WM8978_ALC_CONTROL_2, 4, 7, 0), 154 SOC_SINGLE("ALC Capture Hold", WM8978_ALC_CONTROL_2, 4, 10, 0),
155 SOC_SINGLE("ALC Capture Target", WM8978_ALC_CONTROL_2, 0, 15, 0), 155 SOC_SINGLE("ALC Capture Target", WM8978_ALC_CONTROL_2, 0, 15, 0),
156 156
157 SOC_ENUM("ALC Capture Mode", alc3), 157 SOC_ENUM("ALC Capture Mode", alc3),
158 SOC_SINGLE("ALC Capture Decay", WM8978_ALC_CONTROL_3, 4, 15, 0), 158 SOC_SINGLE("ALC Capture Decay", WM8978_ALC_CONTROL_3, 4, 10, 0),
159 SOC_SINGLE("ALC Capture Attack", WM8978_ALC_CONTROL_3, 0, 15, 0), 159 SOC_SINGLE("ALC Capture Attack", WM8978_ALC_CONTROL_3, 0, 10, 0),
160 160
161 SOC_SINGLE("ALC Capture Noise Gate Switch", WM8978_NOISE_GATE, 3, 1, 0), 161 SOC_SINGLE("ALC Capture Noise Gate Switch", WM8978_NOISE_GATE, 3, 1, 0),
162 SOC_SINGLE("ALC Capture Noise Gate Threshold", 162 SOC_SINGLE("ALC Capture Noise Gate Threshold",
@@ -211,8 +211,10 @@ static const struct snd_kcontrol_new wm8978_snd_controls[] = {
211 WM8978_LOUT2_SPK_CONTROL, WM8978_ROUT2_SPK_CONTROL, 6, 1, 1), 211 WM8978_LOUT2_SPK_CONTROL, WM8978_ROUT2_SPK_CONTROL, 6, 1, 1),
212 212
213 /* DAC / ADC oversampling */ 213 /* DAC / ADC oversampling */
214 SOC_SINGLE("DAC 128x Oversampling Switch", WM8978_DAC_CONTROL, 8, 1, 0), 214 SOC_SINGLE("DAC 128x Oversampling Switch", WM8978_DAC_CONTROL,
215 SOC_SINGLE("ADC 128x Oversampling Switch", WM8978_ADC_CONTROL, 8, 1, 0), 215 5, 1, 0),
216 SOC_SINGLE("ADC 128x Oversampling Switch", WM8978_ADC_CONTROL,
217 5, 1, 0),
216}; 218};
217 219
218/* Mixer #1: Output (OUT1, OUT2) Mixer: mix AUX, Input mixer output and DAC */ 220/* Mixer #1: Output (OUT1, OUT2) Mixer: mix AUX, Input mixer output and DAC */
diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index 37b8aa8a680f..c6c958ee5d59 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c
@@ -107,6 +107,12 @@ struct wm8994_priv {
107 107
108 int revision; 108 int revision;
109 struct wm8994_pdata *pdata; 109 struct wm8994_pdata *pdata;
110
111 unsigned int aif1clk_enable:1;
112 unsigned int aif2clk_enable:1;
113
114 unsigned int aif1clk_disable:1;
115 unsigned int aif2clk_disable:1;
110}; 116};
111 117
112static int wm8994_readable(unsigned int reg) 118static int wm8994_readable(unsigned int reg)
@@ -1004,6 +1010,110 @@ static void wm8994_update_class_w(struct snd_soc_codec *codec)
1004 } 1010 }
1005} 1011}
1006 1012
1013static int late_enable_ev(struct snd_soc_dapm_widget *w,
1014 struct snd_kcontrol *kcontrol, int event)
1015{
1016 struct snd_soc_codec *codec = w->codec;
1017 struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
1018
1019 switch (event) {
1020 case SND_SOC_DAPM_PRE_PMU:
1021 if (wm8994->aif1clk_enable) {
1022 snd_soc_update_bits(codec, WM8994_AIF1_CLOCKING_1,
1023 WM8994_AIF1CLK_ENA_MASK,
1024 WM8994_AIF1CLK_ENA);
1025 wm8994->aif1clk_enable = 0;
1026 }
1027 if (wm8994->aif2clk_enable) {
1028 snd_soc_update_bits(codec, WM8994_AIF2_CLOCKING_1,
1029 WM8994_AIF2CLK_ENA_MASK,
1030 WM8994_AIF2CLK_ENA);
1031 wm8994->aif2clk_enable = 0;
1032 }
1033 break;
1034 }
1035
1036 return 0;
1037}
1038
1039static int late_disable_ev(struct snd_soc_dapm_widget *w,
1040 struct snd_kcontrol *kcontrol, int event)
1041{
1042 struct snd_soc_codec *codec = w->codec;
1043 struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
1044
1045 switch (event) {
1046 case SND_SOC_DAPM_POST_PMD:
1047 if (wm8994->aif1clk_disable) {
1048 snd_soc_update_bits(codec, WM8994_AIF1_CLOCKING_1,
1049 WM8994_AIF1CLK_ENA_MASK, 0);
1050 wm8994->aif1clk_disable = 0;
1051 }
1052 if (wm8994->aif2clk_disable) {
1053 snd_soc_update_bits(codec, WM8994_AIF2_CLOCKING_1,
1054 WM8994_AIF2CLK_ENA_MASK, 0);
1055 wm8994->aif2clk_disable = 0;
1056 }
1057 break;
1058 }
1059
1060 return 0;
1061}
1062
1063static int aif1clk_ev(struct snd_soc_dapm_widget *w,
1064 struct snd_kcontrol *kcontrol, int event)
1065{
1066 struct snd_soc_codec *codec = w->codec;
1067 struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
1068
1069 switch (event) {
1070 case SND_SOC_DAPM_PRE_PMU:
1071 wm8994->aif1clk_enable = 1;
1072 break;
1073 case SND_SOC_DAPM_POST_PMD:
1074 wm8994->aif1clk_disable = 1;
1075 break;
1076 }
1077
1078 return 0;
1079}
1080
1081static int aif2clk_ev(struct snd_soc_dapm_widget *w,
1082 struct snd_kcontrol *kcontrol, int event)
1083{
1084 struct snd_soc_codec *codec = w->codec;
1085 struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
1086
1087 switch (event) {
1088 case SND_SOC_DAPM_PRE_PMU:
1089 wm8994->aif2clk_enable = 1;
1090 break;
1091 case SND_SOC_DAPM_POST_PMD:
1092 wm8994->aif2clk_disable = 1;
1093 break;
1094 }
1095
1096 return 0;
1097}
1098
1099static int adc_mux_ev(struct snd_soc_dapm_widget *w,
1100 struct snd_kcontrol *kcontrol, int event)
1101{
1102 late_enable_ev(w, kcontrol, event);
1103 return 0;
1104}
1105
1106static int dac_ev(struct snd_soc_dapm_widget *w,
1107 struct snd_kcontrol *kcontrol, int event)
1108{
1109 struct snd_soc_codec *codec = w->codec;
1110 unsigned int mask = 1 << w->shift;
1111
1112 snd_soc_update_bits(codec, WM8994_POWER_MANAGEMENT_5,
1113 mask, mask);
1114 return 0;
1115}
1116
1007static const char *hp_mux_text[] = { 1117static const char *hp_mux_text[] = {
1008 "Mixer", 1118 "Mixer",
1009 "DAC", 1119 "DAC",
@@ -1272,6 +1382,59 @@ static const struct soc_enum aif2dacr_src_enum =
1272static const struct snd_kcontrol_new aif2dacr_src_mux = 1382static const struct snd_kcontrol_new aif2dacr_src_mux =
1273 SOC_DAPM_ENUM("AIF2DACR Mux", aif2dacr_src_enum); 1383 SOC_DAPM_ENUM("AIF2DACR Mux", aif2dacr_src_enum);
1274 1384
1385static const struct snd_soc_dapm_widget wm8994_lateclk_revd_widgets[] = {
1386SND_SOC_DAPM_SUPPLY("AIF1CLK", SND_SOC_NOPM, 0, 0, aif1clk_ev,
1387 SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD),
1388SND_SOC_DAPM_SUPPLY("AIF2CLK", SND_SOC_NOPM, 0, 0, aif2clk_ev,
1389 SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD),
1390
1391SND_SOC_DAPM_PGA_E("Late DAC1L Enable PGA", SND_SOC_NOPM, 0, 0, NULL, 0,
1392 late_enable_ev, SND_SOC_DAPM_PRE_PMU),
1393SND_SOC_DAPM_PGA_E("Late DAC1R Enable PGA", SND_SOC_NOPM, 0, 0, NULL, 0,
1394 late_enable_ev, SND_SOC_DAPM_PRE_PMU),
1395SND_SOC_DAPM_PGA_E("Late DAC2L Enable PGA", SND_SOC_NOPM, 0, 0, NULL, 0,
1396 late_enable_ev, SND_SOC_DAPM_PRE_PMU),
1397SND_SOC_DAPM_PGA_E("Late DAC2R Enable PGA", SND_SOC_NOPM, 0, 0, NULL, 0,
1398 late_enable_ev, SND_SOC_DAPM_PRE_PMU),
1399
1400SND_SOC_DAPM_POST("Late Disable PGA", late_disable_ev)
1401};
1402
1403static const struct snd_soc_dapm_widget wm8994_lateclk_widgets[] = {
1404SND_SOC_DAPM_SUPPLY("AIF1CLK", WM8994_AIF1_CLOCKING_1, 0, 0, NULL, 0),
1405SND_SOC_DAPM_SUPPLY("AIF2CLK", WM8994_AIF2_CLOCKING_1, 0, 0, NULL, 0)
1406};
1407
1408static const struct snd_soc_dapm_widget wm8994_dac_revd_widgets[] = {
1409SND_SOC_DAPM_DAC_E("DAC2L", NULL, SND_SOC_NOPM, 3, 0,
1410 dac_ev, SND_SOC_DAPM_PRE_PMU),
1411SND_SOC_DAPM_DAC_E("DAC2R", NULL, SND_SOC_NOPM, 2, 0,
1412 dac_ev, SND_SOC_DAPM_PRE_PMU),
1413SND_SOC_DAPM_DAC_E("DAC1L", NULL, SND_SOC_NOPM, 1, 0,
1414 dac_ev, SND_SOC_DAPM_PRE_PMU),
1415SND_SOC_DAPM_DAC_E("DAC1R", NULL, SND_SOC_NOPM, 0, 0,
1416 dac_ev, SND_SOC_DAPM_PRE_PMU),
1417};
1418
1419static const struct snd_soc_dapm_widget wm8994_dac_widgets[] = {
1420SND_SOC_DAPM_DAC("DAC2L", NULL, WM8994_POWER_MANAGEMENT_5, 3, 0),
1421SND_SOC_DAPM_DAC("DAC2R", NULL, WM8994_POWER_MANAGEMENT_5, 2, 0),
1422SND_SOC_DAPM_DAC("DAC1L", NULL, WM8994_POWER_MANAGEMENT_5, 1, 0),
1423SND_SOC_DAPM_DAC("DAC1R", NULL, WM8994_POWER_MANAGEMENT_5, 0, 0),
1424};
1425
1426static const struct snd_soc_dapm_widget wm8994_adc_revd_widgets[] = {
1427SND_SOC_DAPM_MUX_E("ADCL Mux", WM8994_POWER_MANAGEMENT_4, 1, 0, &adcl_mux,
1428 adc_mux_ev, SND_SOC_DAPM_PRE_PMU),
1429SND_SOC_DAPM_MUX_E("ADCR Mux", WM8994_POWER_MANAGEMENT_4, 0, 0, &adcr_mux,
1430 adc_mux_ev, SND_SOC_DAPM_PRE_PMU),
1431};
1432
1433static const struct snd_soc_dapm_widget wm8994_adc_widgets[] = {
1434SND_SOC_DAPM_MUX("ADCL Mux", WM8994_POWER_MANAGEMENT_4, 1, 0, &adcl_mux),
1435SND_SOC_DAPM_MUX("ADCR Mux", WM8994_POWER_MANAGEMENT_4, 0, 0, &adcr_mux),
1436};
1437
1275static const struct snd_soc_dapm_widget wm8994_dapm_widgets[] = { 1438static const struct snd_soc_dapm_widget wm8994_dapm_widgets[] = {
1276SND_SOC_DAPM_INPUT("DMIC1DAT"), 1439SND_SOC_DAPM_INPUT("DMIC1DAT"),
1277SND_SOC_DAPM_INPUT("DMIC2DAT"), 1440SND_SOC_DAPM_INPUT("DMIC2DAT"),
@@ -1284,9 +1447,6 @@ SND_SOC_DAPM_SUPPLY("DSP1CLK", WM8994_CLOCKING_1, 3, 0, NULL, 0),
1284SND_SOC_DAPM_SUPPLY("DSP2CLK", WM8994_CLOCKING_1, 2, 0, NULL, 0), 1447SND_SOC_DAPM_SUPPLY("DSP2CLK", WM8994_CLOCKING_1, 2, 0, NULL, 0),
1285SND_SOC_DAPM_SUPPLY("DSPINTCLK", WM8994_CLOCKING_1, 1, 0, NULL, 0), 1448SND_SOC_DAPM_SUPPLY("DSPINTCLK", WM8994_CLOCKING_1, 1, 0, NULL, 0),
1286 1449
1287SND_SOC_DAPM_SUPPLY("AIF1CLK", WM8994_AIF1_CLOCKING_1, 0, 0, NULL, 0),
1288SND_SOC_DAPM_SUPPLY("AIF2CLK", WM8994_AIF2_CLOCKING_1, 0, 0, NULL, 0),
1289
1290SND_SOC_DAPM_AIF_OUT("AIF1ADC1L", NULL, 1450SND_SOC_DAPM_AIF_OUT("AIF1ADC1L", NULL,
1291 0, WM8994_POWER_MANAGEMENT_4, 9, 0), 1451 0, WM8994_POWER_MANAGEMENT_4, 9, 0),
1292SND_SOC_DAPM_AIF_OUT("AIF1ADC1R", NULL, 1452SND_SOC_DAPM_AIF_OUT("AIF1ADC1R", NULL,
@@ -1369,14 +1529,6 @@ SND_SOC_DAPM_ADC("DMIC1R", NULL, WM8994_POWER_MANAGEMENT_4, 2, 0),
1369SND_SOC_DAPM_ADC("ADCL", NULL, SND_SOC_NOPM, 1, 0), 1529SND_SOC_DAPM_ADC("ADCL", NULL, SND_SOC_NOPM, 1, 0),
1370SND_SOC_DAPM_ADC("ADCR", NULL, SND_SOC_NOPM, 0, 0), 1530SND_SOC_DAPM_ADC("ADCR", NULL, SND_SOC_NOPM, 0, 0),
1371 1531
1372SND_SOC_DAPM_MUX("ADCL Mux", WM8994_POWER_MANAGEMENT_4, 1, 0, &adcl_mux),
1373SND_SOC_DAPM_MUX("ADCR Mux", WM8994_POWER_MANAGEMENT_4, 0, 0, &adcr_mux),
1374
1375SND_SOC_DAPM_DAC("DAC2L", NULL, WM8994_POWER_MANAGEMENT_5, 3, 0),
1376SND_SOC_DAPM_DAC("DAC2R", NULL, WM8994_POWER_MANAGEMENT_5, 2, 0),
1377SND_SOC_DAPM_DAC("DAC1L", NULL, WM8994_POWER_MANAGEMENT_5, 1, 0),
1378SND_SOC_DAPM_DAC("DAC1R", NULL, WM8994_POWER_MANAGEMENT_5, 0, 0),
1379
1380SND_SOC_DAPM_MUX("Left Headphone Mux", SND_SOC_NOPM, 0, 0, &hpl_mux), 1532SND_SOC_DAPM_MUX("Left Headphone Mux", SND_SOC_NOPM, 0, 0, &hpl_mux),
1381SND_SOC_DAPM_MUX("Right Headphone Mux", SND_SOC_NOPM, 0, 0, &hpr_mux), 1533SND_SOC_DAPM_MUX("Right Headphone Mux", SND_SOC_NOPM, 0, 0, &hpr_mux),
1382 1534
@@ -1516,14 +1668,12 @@ static const struct snd_soc_dapm_route intercon[] = {
1516 { "AIF2ADC Mux", "AIF3DACDAT", "AIF3ADCDAT" }, 1668 { "AIF2ADC Mux", "AIF3DACDAT", "AIF3ADCDAT" },
1517 1669
1518 /* DAC1 inputs */ 1670 /* DAC1 inputs */
1519 { "DAC1L", NULL, "DAC1L Mixer" },
1520 { "DAC1L Mixer", "AIF2 Switch", "AIF2DACL" }, 1671 { "DAC1L Mixer", "AIF2 Switch", "AIF2DACL" },
1521 { "DAC1L Mixer", "AIF1.2 Switch", "AIF1DAC2L" }, 1672 { "DAC1L Mixer", "AIF1.2 Switch", "AIF1DAC2L" },
1522 { "DAC1L Mixer", "AIF1.1 Switch", "AIF1DAC1L" }, 1673 { "DAC1L Mixer", "AIF1.1 Switch", "AIF1DAC1L" },
1523 { "DAC1L Mixer", "Left Sidetone Switch", "Left Sidetone" }, 1674 { "DAC1L Mixer", "Left Sidetone Switch", "Left Sidetone" },
1524 { "DAC1L Mixer", "Right Sidetone Switch", "Right Sidetone" }, 1675 { "DAC1L Mixer", "Right Sidetone Switch", "Right Sidetone" },
1525 1676
1526 { "DAC1R", NULL, "DAC1R Mixer" },
1527 { "DAC1R Mixer", "AIF2 Switch", "AIF2DACR" }, 1677 { "DAC1R Mixer", "AIF2 Switch", "AIF2DACR" },
1528 { "DAC1R Mixer", "AIF1.2 Switch", "AIF1DAC2R" }, 1678 { "DAC1R Mixer", "AIF1.2 Switch", "AIF1DAC2R" },
1529 { "DAC1R Mixer", "AIF1.1 Switch", "AIF1DAC1R" }, 1679 { "DAC1R Mixer", "AIF1.1 Switch", "AIF1DAC1R" },
@@ -1532,7 +1682,6 @@ static const struct snd_soc_dapm_route intercon[] = {
1532 1682
1533 /* DAC2/AIF2 outputs */ 1683 /* DAC2/AIF2 outputs */
1534 { "AIF2ADCL", NULL, "AIF2DAC2L Mixer" }, 1684 { "AIF2ADCL", NULL, "AIF2DAC2L Mixer" },
1535 { "DAC2L", NULL, "AIF2DAC2L Mixer" },
1536 { "AIF2DAC2L Mixer", "AIF2 Switch", "AIF2DACL" }, 1685 { "AIF2DAC2L Mixer", "AIF2 Switch", "AIF2DACL" },
1537 { "AIF2DAC2L Mixer", "AIF1.2 Switch", "AIF1DAC2L" }, 1686 { "AIF2DAC2L Mixer", "AIF1.2 Switch", "AIF1DAC2L" },
1538 { "AIF2DAC2L Mixer", "AIF1.1 Switch", "AIF1DAC1L" }, 1687 { "AIF2DAC2L Mixer", "AIF1.1 Switch", "AIF1DAC1L" },
@@ -1540,7 +1689,6 @@ static const struct snd_soc_dapm_route intercon[] = {
1540 { "AIF2DAC2L Mixer", "Right Sidetone Switch", "Right Sidetone" }, 1689 { "AIF2DAC2L Mixer", "Right Sidetone Switch", "Right Sidetone" },
1541 1690
1542 { "AIF2ADCR", NULL, "AIF2DAC2R Mixer" }, 1691 { "AIF2ADCR", NULL, "AIF2DAC2R Mixer" },
1543 { "DAC2R", NULL, "AIF2DAC2R Mixer" },
1544 { "AIF2DAC2R Mixer", "AIF2 Switch", "AIF2DACR" }, 1692 { "AIF2DAC2R Mixer", "AIF2 Switch", "AIF2DACR" },
1545 { "AIF2DAC2R Mixer", "AIF1.2 Switch", "AIF1DAC2R" }, 1693 { "AIF2DAC2R Mixer", "AIF1.2 Switch", "AIF1DAC2R" },
1546 { "AIF2DAC2R Mixer", "AIF1.1 Switch", "AIF1DAC1R" }, 1694 { "AIF2DAC2R Mixer", "AIF1.1 Switch", "AIF1DAC1R" },
@@ -1584,6 +1732,24 @@ static const struct snd_soc_dapm_route intercon[] = {
1584 { "Right Headphone Mux", "DAC", "DAC1R" }, 1732 { "Right Headphone Mux", "DAC", "DAC1R" },
1585}; 1733};
1586 1734
1735static const struct snd_soc_dapm_route wm8994_lateclk_revd_intercon[] = {
1736 { "DAC1L", NULL, "Late DAC1L Enable PGA" },
1737 { "Late DAC1L Enable PGA", NULL, "DAC1L Mixer" },
1738 { "DAC1R", NULL, "Late DAC1R Enable PGA" },
1739 { "Late DAC1R Enable PGA", NULL, "DAC1R Mixer" },
1740 { "DAC2L", NULL, "Late DAC2L Enable PGA" },
1741 { "Late DAC2L Enable PGA", NULL, "AIF2DAC2L Mixer" },
1742 { "DAC2R", NULL, "Late DAC2R Enable PGA" },
1743 { "Late DAC2R Enable PGA", NULL, "AIF2DAC2R Mixer" }
1744};
1745
1746static const struct snd_soc_dapm_route wm8994_lateclk_intercon[] = {
1747 { "DAC1L", NULL, "DAC1L Mixer" },
1748 { "DAC1R", NULL, "DAC1R Mixer" },
1749 { "DAC2L", NULL, "AIF2DAC2L Mixer" },
1750 { "DAC2R", NULL, "AIF2DAC2R Mixer" },
1751};
1752
1587static const struct snd_soc_dapm_route wm8994_revd_intercon[] = { 1753static const struct snd_soc_dapm_route wm8994_revd_intercon[] = {
1588 { "AIF1DACDAT", NULL, "AIF2DACDAT" }, 1754 { "AIF1DACDAT", NULL, "AIF2DACDAT" },
1589 { "AIF2DACDAT", NULL, "AIF1DACDAT" }, 1755 { "AIF2DACDAT", NULL, "AIF1DACDAT" },
@@ -2514,6 +2680,22 @@ static int wm8994_resume(struct snd_soc_codec *codec)
2514{ 2680{
2515 struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec); 2681 struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
2516 int i, ret; 2682 int i, ret;
2683 unsigned int val, mask;
2684
2685 if (wm8994->revision < 4) {
2686 /* force a HW read */
2687 val = wm8994_reg_read(codec->control_data,
2688 WM8994_POWER_MANAGEMENT_5);
2689
2690 /* modify the cache only */
2691 codec->cache_only = 1;
2692 mask = WM8994_DAC1R_ENA | WM8994_DAC1L_ENA |
2693 WM8994_DAC2R_ENA | WM8994_DAC2L_ENA;
2694 val &= mask;
2695 snd_soc_update_bits(codec, WM8994_POWER_MANAGEMENT_5,
2696 mask, val);
2697 codec->cache_only = 0;
2698 }
2517 2699
2518 /* Restore the registers */ 2700 /* Restore the registers */
2519 ret = snd_soc_cache_sync(codec); 2701 ret = snd_soc_cache_sync(codec);
@@ -2847,11 +3029,10 @@ static void wm8958_default_micdet(u16 status, void *data)
2847 report |= SND_JACK_BTN_5; 3029 report |= SND_JACK_BTN_5;
2848 3030
2849done: 3031done:
2850 snd_soc_jack_report(wm8994->micdet[0].jack, 3032 snd_soc_jack_report(wm8994->micdet[0].jack, report,
2851 SND_JACK_BTN_0 | SND_JACK_BTN_1 | SND_JACK_BTN_2 | 3033 SND_JACK_BTN_0 | SND_JACK_BTN_1 | SND_JACK_BTN_2 |
2852 SND_JACK_BTN_3 | SND_JACK_BTN_4 | SND_JACK_BTN_5 | 3034 SND_JACK_BTN_3 | SND_JACK_BTN_4 | SND_JACK_BTN_5 |
2853 SND_JACK_MICROPHONE | SND_JACK_VIDEOOUT, 3035 SND_JACK_MICROPHONE | SND_JACK_VIDEOOUT);
2854 report);
2855} 3036}
2856 3037
2857/** 3038/**
@@ -3125,10 +3306,31 @@ static int wm8994_codec_probe(struct snd_soc_codec *codec)
3125 case WM8994: 3306 case WM8994:
3126 snd_soc_dapm_new_controls(dapm, wm8994_specific_dapm_widgets, 3307 snd_soc_dapm_new_controls(dapm, wm8994_specific_dapm_widgets,
3127 ARRAY_SIZE(wm8994_specific_dapm_widgets)); 3308 ARRAY_SIZE(wm8994_specific_dapm_widgets));
3309 if (wm8994->revision < 4) {
3310 snd_soc_dapm_new_controls(dapm, wm8994_lateclk_revd_widgets,
3311 ARRAY_SIZE(wm8994_lateclk_revd_widgets));
3312 snd_soc_dapm_new_controls(dapm, wm8994_adc_revd_widgets,
3313 ARRAY_SIZE(wm8994_adc_revd_widgets));
3314 snd_soc_dapm_new_controls(dapm, wm8994_dac_revd_widgets,
3315 ARRAY_SIZE(wm8994_dac_revd_widgets));
3316 } else {
3317 snd_soc_dapm_new_controls(dapm, wm8994_lateclk_widgets,
3318 ARRAY_SIZE(wm8994_lateclk_widgets));
3319 snd_soc_dapm_new_controls(dapm, wm8994_adc_widgets,
3320 ARRAY_SIZE(wm8994_adc_widgets));
3321 snd_soc_dapm_new_controls(dapm, wm8994_dac_widgets,
3322 ARRAY_SIZE(wm8994_dac_widgets));
3323 }
3128 break; 3324 break;
3129 case WM8958: 3325 case WM8958:
3130 snd_soc_add_controls(codec, wm8958_snd_controls, 3326 snd_soc_add_controls(codec, wm8958_snd_controls,
3131 ARRAY_SIZE(wm8958_snd_controls)); 3327 ARRAY_SIZE(wm8958_snd_controls));
3328 snd_soc_dapm_new_controls(dapm, wm8994_lateclk_widgets,
3329 ARRAY_SIZE(wm8994_lateclk_widgets));
3330 snd_soc_dapm_new_controls(dapm, wm8994_adc_widgets,
3331 ARRAY_SIZE(wm8994_adc_widgets));
3332 snd_soc_dapm_new_controls(dapm, wm8994_dac_widgets,
3333 ARRAY_SIZE(wm8994_dac_widgets));
3132 snd_soc_dapm_new_controls(dapm, wm8958_dapm_widgets, 3334 snd_soc_dapm_new_controls(dapm, wm8958_dapm_widgets,
3133 ARRAY_SIZE(wm8958_dapm_widgets)); 3335 ARRAY_SIZE(wm8958_dapm_widgets));
3134 break; 3336 break;
@@ -3143,12 +3345,19 @@ static int wm8994_codec_probe(struct snd_soc_codec *codec)
3143 snd_soc_dapm_add_routes(dapm, wm8994_intercon, 3345 snd_soc_dapm_add_routes(dapm, wm8994_intercon,
3144 ARRAY_SIZE(wm8994_intercon)); 3346 ARRAY_SIZE(wm8994_intercon));
3145 3347
3146 if (wm8994->revision < 4) 3348 if (wm8994->revision < 4) {
3147 snd_soc_dapm_add_routes(dapm, wm8994_revd_intercon, 3349 snd_soc_dapm_add_routes(dapm, wm8994_revd_intercon,
3148 ARRAY_SIZE(wm8994_revd_intercon)); 3350 ARRAY_SIZE(wm8994_revd_intercon));
3149 3351 snd_soc_dapm_add_routes(dapm, wm8994_lateclk_revd_intercon,
3352 ARRAY_SIZE(wm8994_lateclk_revd_intercon));
3353 } else {
3354 snd_soc_dapm_add_routes(dapm, wm8994_lateclk_intercon,
3355 ARRAY_SIZE(wm8994_lateclk_intercon));
3356 }
3150 break; 3357 break;
3151 case WM8958: 3358 case WM8958:
3359 snd_soc_dapm_add_routes(dapm, wm8994_lateclk_intercon,
3360 ARRAY_SIZE(wm8994_lateclk_intercon));
3152 snd_soc_dapm_add_routes(dapm, wm8958_intercon, 3361 snd_soc_dapm_add_routes(dapm, wm8958_intercon,
3153 ARRAY_SIZE(wm8958_intercon)); 3362 ARRAY_SIZE(wm8958_intercon));
3154 break; 3363 break;
diff --git a/sound/soc/codecs/wm9081.c b/sound/soc/codecs/wm9081.c
index 43825b2102a5..cce704c275c6 100644
--- a/sound/soc/codecs/wm9081.c
+++ b/sound/soc/codecs/wm9081.c
@@ -15,6 +15,7 @@
15#include <linux/moduleparam.h> 15#include <linux/moduleparam.h>
16#include <linux/init.h> 16#include <linux/init.h>
17#include <linux/delay.h> 17#include <linux/delay.h>
18#include <linux/device.h>
18#include <linux/pm.h> 19#include <linux/pm.h>
19#include <linux/i2c.h> 20#include <linux/i2c.h>
20#include <linux/platform_device.h> 21#include <linux/platform_device.h>
@@ -1341,6 +1342,10 @@ static __devinit int wm9081_i2c_probe(struct i2c_client *i2c,
1341 wm9081->control_type = SND_SOC_I2C; 1342 wm9081->control_type = SND_SOC_I2C;
1342 wm9081->control_data = i2c; 1343 wm9081->control_data = i2c;
1343 1344
1345 if (dev_get_platdata(&i2c->dev))
1346 memcpy(&wm9081->retune, dev_get_platdata(&i2c->dev),
1347 sizeof(wm9081->retune));
1348
1344 ret = snd_soc_register_codec(&i2c->dev, 1349 ret = snd_soc_register_codec(&i2c->dev,
1345 &soc_codec_dev_wm9081, &wm9081_dai, 1); 1350 &soc_codec_dev_wm9081, &wm9081_dai, 1);
1346 if (ret < 0) 1351 if (ret < 0)
diff --git a/sound/soc/codecs/wm_hubs.c b/sound/soc/codecs/wm_hubs.c
index 613df5db0b32..516892706063 100644
--- a/sound/soc/codecs/wm_hubs.c
+++ b/sound/soc/codecs/wm_hubs.c
@@ -674,6 +674,9 @@ SND_SOC_DAPM_OUTPUT("LINEOUT2N"),
674}; 674};
675 675
676static const struct snd_soc_dapm_route analogue_routes[] = { 676static const struct snd_soc_dapm_route analogue_routes[] = {
677 { "MICBIAS1", NULL, "CLK_SYS" },
678 { "MICBIAS2", NULL, "CLK_SYS" },
679
677 { "IN1L PGA", "IN1LP Switch", "IN1LP" }, 680 { "IN1L PGA", "IN1LP Switch", "IN1LP" },
678 { "IN1L PGA", "IN1LN Switch", "IN1LN" }, 681 { "IN1L PGA", "IN1LN Switch", "IN1LN" },
679 682
diff --git a/sound/soc/imx/eukrea-tlv320.c b/sound/soc/imx/eukrea-tlv320.c
index e20c9e1457c0..1e9bccae4e80 100644
--- a/sound/soc/imx/eukrea-tlv320.c
+++ b/sound/soc/imx/eukrea-tlv320.c
@@ -79,7 +79,7 @@ static struct snd_soc_dai_link eukrea_tlv320_dai = {
79 .name = "tlv320aic23", 79 .name = "tlv320aic23",
80 .stream_name = "TLV320AIC23", 80 .stream_name = "TLV320AIC23",
81 .codec_dai_name = "tlv320aic23-hifi", 81 .codec_dai_name = "tlv320aic23-hifi",
82 .platform_name = "imx-pcm-audio.0", 82 .platform_name = "imx-fiq-pcm-audio.0",
83 .codec_name = "tlv320aic23-codec.0-001a", 83 .codec_name = "tlv320aic23-codec.0-001a",
84 .cpu_dai_name = "imx-ssi.0", 84 .cpu_dai_name = "imx-ssi.0",
85 .ops = &eukrea_tlv320_snd_ops, 85 .ops = &eukrea_tlv320_snd_ops,
diff --git a/sound/soc/omap/am3517evm.c b/sound/soc/omap/am3517evm.c
index 161750443ebc..73dde4a1adc3 100644
--- a/sound/soc/omap/am3517evm.c
+++ b/sound/soc/omap/am3517evm.c
@@ -139,7 +139,7 @@ static struct snd_soc_dai_link am3517evm_dai = {
139 .cpu_dai_name ="omap-mcbsp-dai.0", 139 .cpu_dai_name ="omap-mcbsp-dai.0",
140 .codec_dai_name = "tlv320aic23-hifi", 140 .codec_dai_name = "tlv320aic23-hifi",
141 .platform_name = "omap-pcm-audio", 141 .platform_name = "omap-pcm-audio",
142 .codec_name = "tlv320aic23-codec", 142 .codec_name = "tlv320aic23-codec.2-001a",
143 .init = am3517evm_aic23_init, 143 .init = am3517evm_aic23_init,
144 .ops = &am3517evm_ops, 144 .ops = &am3517evm_ops,
145}; 145};
diff --git a/sound/soc/pxa/e740_wm9705.c b/sound/soc/pxa/e740_wm9705.c
index 28333e7d9c50..dc65650a6fa1 100644
--- a/sound/soc/pxa/e740_wm9705.c
+++ b/sound/soc/pxa/e740_wm9705.c
@@ -117,7 +117,7 @@ static struct snd_soc_dai_link e740_dai[] = {
117 { 117 {
118 .name = "AC97", 118 .name = "AC97",
119 .stream_name = "AC97 HiFi", 119 .stream_name = "AC97 HiFi",
120 .cpu_dai_name = "pxa-ac97.0", 120 .cpu_dai_name = "pxa2xx-ac97",
121 .codec_dai_name = "wm9705-hifi", 121 .codec_dai_name = "wm9705-hifi",
122 .platform_name = "pxa-pcm-audio", 122 .platform_name = "pxa-pcm-audio",
123 .codec_name = "wm9705-codec", 123 .codec_name = "wm9705-codec",
@@ -126,7 +126,7 @@ static struct snd_soc_dai_link e740_dai[] = {
126 { 126 {
127 .name = "AC97 Aux", 127 .name = "AC97 Aux",
128 .stream_name = "AC97 Aux", 128 .stream_name = "AC97 Aux",
129 .cpu_dai_name = "pxa-ac97.1", 129 .cpu_dai_name = "pxa2xx-ac97-aux",
130 .codec_dai_name = "wm9705-aux", 130 .codec_dai_name = "wm9705-aux",
131 .platform_name = "pxa-pcm-audio", 131 .platform_name = "pxa-pcm-audio",
132 .codec_name = "wm9705-codec", 132 .codec_name = "wm9705-codec",
diff --git a/sound/soc/pxa/e750_wm9705.c b/sound/soc/pxa/e750_wm9705.c
index 01bf31675c55..51897fcd911b 100644
--- a/sound/soc/pxa/e750_wm9705.c
+++ b/sound/soc/pxa/e750_wm9705.c
@@ -99,7 +99,7 @@ static struct snd_soc_dai_link e750_dai[] = {
99 { 99 {
100 .name = "AC97", 100 .name = "AC97",
101 .stream_name = "AC97 HiFi", 101 .stream_name = "AC97 HiFi",
102 .cpu_dai_name = "pxa-ac97.0", 102 .cpu_dai_name = "pxa2xx-ac97",
103 .codec_dai_name = "wm9705-hifi", 103 .codec_dai_name = "wm9705-hifi",
104 .platform_name = "pxa-pcm-audio", 104 .platform_name = "pxa-pcm-audio",
105 .codec_name = "wm9705-codec", 105 .codec_name = "wm9705-codec",
@@ -109,7 +109,7 @@ static struct snd_soc_dai_link e750_dai[] = {
109 { 109 {
110 .name = "AC97 Aux", 110 .name = "AC97 Aux",
111 .stream_name = "AC97 Aux", 111 .stream_name = "AC97 Aux",
112 .cpu_dai_name = "pxa-ac97.1", 112 .cpu_dai_name = "pxa2xx-ac97-aux",
113 .codec_dai_name ="wm9705-aux", 113 .codec_dai_name ="wm9705-aux",
114 .platform_name = "pxa-pcm-audio", 114 .platform_name = "pxa-pcm-audio",
115 .codec_name = "wm9705-codec", 115 .codec_name = "wm9705-codec",
diff --git a/sound/soc/pxa/e800_wm9712.c b/sound/soc/pxa/e800_wm9712.c
index c6a37c6ef23b..053ed208e59f 100644
--- a/sound/soc/pxa/e800_wm9712.c
+++ b/sound/soc/pxa/e800_wm9712.c
@@ -89,7 +89,7 @@ static struct snd_soc_dai_link e800_dai[] = {
89 { 89 {
90 .name = "AC97", 90 .name = "AC97",
91 .stream_name = "AC97 HiFi", 91 .stream_name = "AC97 HiFi",
92 .cpu_dai_name = "pxa-ac97.0", 92 .cpu_dai_name = "pxa2xx-ac97",
93 .codec_dai_name = "wm9712-hifi", 93 .codec_dai_name = "wm9712-hifi",
94 .platform_name = "pxa-pcm-audio", 94 .platform_name = "pxa-pcm-audio",
95 .codec_name = "wm9712-codec", 95 .codec_name = "wm9712-codec",
@@ -98,7 +98,7 @@ static struct snd_soc_dai_link e800_dai[] = {
98 { 98 {
99 .name = "AC97 Aux", 99 .name = "AC97 Aux",
100 .stream_name = "AC97 Aux", 100 .stream_name = "AC97 Aux",
101 .cpu_dai_name = "pxa-ac97.1", 101 .cpu_dai_name = "pxa2xx-ac97-aux",
102 .codec_dai_name ="wm9712-aux", 102 .codec_dai_name ="wm9712-aux",
103 .platform_name = "pxa-pcm-audio", 103 .platform_name = "pxa-pcm-audio",
104 .codec_name = "wm9712-codec", 104 .codec_name = "wm9712-codec",
diff --git a/sound/soc/pxa/em-x270.c b/sound/soc/pxa/em-x270.c
index fc22e6eefc98..b13a4252812d 100644
--- a/sound/soc/pxa/em-x270.c
+++ b/sound/soc/pxa/em-x270.c
@@ -37,7 +37,7 @@ static struct snd_soc_dai_link em_x270_dai[] = {
37 { 37 {
38 .name = "AC97", 38 .name = "AC97",
39 .stream_name = "AC97 HiFi", 39 .stream_name = "AC97 HiFi",
40 .cpu_dai_name = "pxa-ac97.0", 40 .cpu_dai_name = "pxa2xx-ac97",
41 .codec_dai_name = "wm9712-hifi", 41 .codec_dai_name = "wm9712-hifi",
42 .platform_name = "pxa-pcm-audio", 42 .platform_name = "pxa-pcm-audio",
43 .codec_name = "wm9712-codec", 43 .codec_name = "wm9712-codec",
@@ -45,7 +45,7 @@ static struct snd_soc_dai_link em_x270_dai[] = {
45 { 45 {
46 .name = "AC97 Aux", 46 .name = "AC97 Aux",
47 .stream_name = "AC97 Aux", 47 .stream_name = "AC97 Aux",
48 .cpu_dai_name = "pxa-ac97.1", 48 .cpu_dai_name = "pxa2xx-ac97-aux",
49 .codec_dai_name ="wm9712-aux", 49 .codec_dai_name ="wm9712-aux",
50 .platform_name = "pxa-pcm-audio", 50 .platform_name = "pxa-pcm-audio",
51 .codec_name = "wm9712-codec", 51 .codec_name = "wm9712-codec",
diff --git a/sound/soc/pxa/mioa701_wm9713.c b/sound/soc/pxa/mioa701_wm9713.c
index 0d70fc8c12bd..38ca6759907e 100644
--- a/sound/soc/pxa/mioa701_wm9713.c
+++ b/sound/soc/pxa/mioa701_wm9713.c
@@ -162,7 +162,7 @@ static struct snd_soc_dai_link mioa701_dai[] = {
162 { 162 {
163 .name = "AC97", 163 .name = "AC97",
164 .stream_name = "AC97 HiFi", 164 .stream_name = "AC97 HiFi",
165 .cpu_dai_name = "pxa-ac97.0", 165 .cpu_dai_name = "pxa2xx-ac97",
166 .codec_dai_name = "wm9713-hifi", 166 .codec_dai_name = "wm9713-hifi",
167 .codec_name = "wm9713-codec", 167 .codec_name = "wm9713-codec",
168 .init = mioa701_wm9713_init, 168 .init = mioa701_wm9713_init,
@@ -172,7 +172,7 @@ static struct snd_soc_dai_link mioa701_dai[] = {
172 { 172 {
173 .name = "AC97 Aux", 173 .name = "AC97 Aux",
174 .stream_name = "AC97 Aux", 174 .stream_name = "AC97 Aux",
175 .cpu_dai_name = "pxa-ac97.1", 175 .cpu_dai_name = "pxa2xx-ac97-aux",
176 .codec_dai_name ="wm9713-aux", 176 .codec_dai_name ="wm9713-aux",
177 .codec_name = "wm9713-codec", 177 .codec_name = "wm9713-codec",
178 .platform_name = "pxa-pcm-audio", 178 .platform_name = "pxa-pcm-audio",
diff --git a/sound/soc/pxa/palm27x.c b/sound/soc/pxa/palm27x.c
index 857db96d4a4f..504e4004f004 100644
--- a/sound/soc/pxa/palm27x.c
+++ b/sound/soc/pxa/palm27x.c
@@ -132,7 +132,7 @@ static struct snd_soc_dai_link palm27x_dai[] = {
132{ 132{
133 .name = "AC97 HiFi", 133 .name = "AC97 HiFi",
134 .stream_name = "AC97 HiFi", 134 .stream_name = "AC97 HiFi",
135 .cpu_dai_name = "pxa-ac97.0", 135 .cpu_dai_name = "pxa2xx-ac97",
136 .codec_dai_name = "wm9712-hifi", 136 .codec_dai_name = "wm9712-hifi",
137 .codec_name = "wm9712-codec", 137 .codec_name = "wm9712-codec",
138 .platform_name = "pxa-pcm-audio", 138 .platform_name = "pxa-pcm-audio",
@@ -141,7 +141,7 @@ static struct snd_soc_dai_link palm27x_dai[] = {
141{ 141{
142 .name = "AC97 Aux", 142 .name = "AC97 Aux",
143 .stream_name = "AC97 Aux", 143 .stream_name = "AC97 Aux",
144 .cpu_dai_name = "pxa-ac97.1", 144 .cpu_dai_name = "pxa2xx-ac97-aux",
145 .codec_dai_name = "wm9712-aux", 145 .codec_dai_name = "wm9712-aux",
146 .codec_name = "wm9712-codec", 146 .codec_name = "wm9712-codec",
147 .platform_name = "pxa-pcm-audio", 147 .platform_name = "pxa-pcm-audio",
diff --git a/sound/soc/pxa/tosa.c b/sound/soc/pxa/tosa.c
index f75804ef0897..4b6e5d608b42 100644
--- a/sound/soc/pxa/tosa.c
+++ b/sound/soc/pxa/tosa.c
@@ -219,7 +219,7 @@ static struct snd_soc_dai_link tosa_dai[] = {
219{ 219{
220 .name = "AC97", 220 .name = "AC97",
221 .stream_name = "AC97 HiFi", 221 .stream_name = "AC97 HiFi",
222 .cpu_dai_name = "pxa-ac97.0", 222 .cpu_dai_name = "pxa2xx-ac97",
223 .codec_dai_name = "wm9712-hifi", 223 .codec_dai_name = "wm9712-hifi",
224 .platform_name = "pxa-pcm-audio", 224 .platform_name = "pxa-pcm-audio",
225 .codec_name = "wm9712-codec", 225 .codec_name = "wm9712-codec",
@@ -229,7 +229,7 @@ static struct snd_soc_dai_link tosa_dai[] = {
229{ 229{
230 .name = "AC97 Aux", 230 .name = "AC97 Aux",
231 .stream_name = "AC97 Aux", 231 .stream_name = "AC97 Aux",
232 .cpu_dai_name = "pxa-ac97.1", 232 .cpu_dai_name = "pxa2xx-ac97-aux",
233 .codec_dai_name = "wm9712-aux", 233 .codec_dai_name = "wm9712-aux",
234 .platform_name = "pxa-pcm-audio", 234 .platform_name = "pxa-pcm-audio",
235 .codec_name = "wm9712-codec", 235 .codec_name = "wm9712-codec",
diff --git a/sound/soc/pxa/zylonite.c b/sound/soc/pxa/zylonite.c
index b222a7d72027..25bba108fea3 100644
--- a/sound/soc/pxa/zylonite.c
+++ b/sound/soc/pxa/zylonite.c
@@ -166,7 +166,7 @@ static struct snd_soc_dai_link zylonite_dai[] = {
166 .stream_name = "AC97 HiFi", 166 .stream_name = "AC97 HiFi",
167 .codec_name = "wm9713-codec", 167 .codec_name = "wm9713-codec",
168 .platform_name = "pxa-pcm-audio", 168 .platform_name = "pxa-pcm-audio",
169 .cpu_dai_name = "pxa-ac97.0", 169 .cpu_dai_name = "pxa2xx-ac97",
170 .codec_name = "wm9713-hifi", 170 .codec_name = "wm9713-hifi",
171 .init = zylonite_wm9713_init, 171 .init = zylonite_wm9713_init,
172}, 172},
@@ -175,7 +175,7 @@ static struct snd_soc_dai_link zylonite_dai[] = {
175 .stream_name = "AC97 Aux", 175 .stream_name = "AC97 Aux",
176 .codec_name = "wm9713-codec", 176 .codec_name = "wm9713-codec",
177 .platform_name = "pxa-pcm-audio", 177 .platform_name = "pxa-pcm-audio",
178 .cpu_dai_name = "pxa-ac97.1", 178 .cpu_dai_name = "pxa2xx-ac97-aux",
179 .codec_name = "wm9713-aux", 179 .codec_name = "wm9713-aux",
180}, 180},
181{ 181{
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 8194f150bab7..1790f83ee665 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -712,7 +712,15 @@ static int dapm_supply_check_power(struct snd_soc_dapm_widget *w)
712 !path->connected(path->source, path->sink)) 712 !path->connected(path->source, path->sink))
713 continue; 713 continue;
714 714
715 if (path->sink && path->sink->power_check && 715 if (!path->sink)
716 continue;
717
718 if (path->sink->force) {
719 power = 1;
720 break;
721 }
722
723 if (path->sink->power_check &&
716 path->sink->power_check(path->sink)) { 724 path->sink->power_check(path->sink)) {
717 power = 1; 725 power = 1;
718 break; 726 break;
@@ -933,7 +941,7 @@ static void dapm_seq_run(struct snd_soc_dapm_context *dapm,
933 } 941 }
934 942
935 if (!list_empty(&pending)) 943 if (!list_empty(&pending))
936 dapm_seq_run_coalesced(dapm, &pending); 944 dapm_seq_run_coalesced(cur_dapm, &pending);
937} 945}
938 946
939static void dapm_widget_update(struct snd_soc_dapm_context *dapm) 947static void dapm_widget_update(struct snd_soc_dapm_context *dapm)
@@ -1627,6 +1635,7 @@ EXPORT_SYMBOL_GPL(snd_soc_dapm_add_routes);
1627int snd_soc_dapm_new_widgets(struct snd_soc_dapm_context *dapm) 1635int snd_soc_dapm_new_widgets(struct snd_soc_dapm_context *dapm)
1628{ 1636{
1629 struct snd_soc_dapm_widget *w; 1637 struct snd_soc_dapm_widget *w;
1638 unsigned int val;
1630 1639
1631 list_for_each_entry(w, &dapm->card->widgets, list) 1640 list_for_each_entry(w, &dapm->card->widgets, list)
1632 { 1641 {
@@ -1675,6 +1684,18 @@ int snd_soc_dapm_new_widgets(struct snd_soc_dapm_context *dapm)
1675 case snd_soc_dapm_post: 1684 case snd_soc_dapm_post:
1676 break; 1685 break;
1677 } 1686 }
1687
1688 /* Read the initial power state from the device */
1689 if (w->reg >= 0) {
1690 val = snd_soc_read(w->codec, w->reg);
1691 val &= 1 << w->shift;
1692 if (w->invert)
1693 val = !val;
1694
1695 if (val)
1696 w->power = 1;
1697 }
1698
1678 w->new = 1; 1699 w->new = 1;
1679 } 1700 }
1680 1701
diff --git a/sound/usb/card.c b/sound/usb/card.c
index 800f7cb4f251..c0f8270bc199 100644
--- a/sound/usb/card.c
+++ b/sound/usb/card.c
@@ -323,6 +323,7 @@ static int snd_usb_audio_create(struct usb_device *dev, int idx,
323 return -ENOMEM; 323 return -ENOMEM;
324 } 324 }
325 325
326 mutex_init(&chip->shutdown_mutex);
326 chip->index = idx; 327 chip->index = idx;
327 chip->dev = dev; 328 chip->dev = dev;
328 chip->card = card; 329 chip->card = card;
@@ -531,6 +532,7 @@ static void snd_usb_audio_disconnect(struct usb_device *dev, void *ptr)
531 chip = ptr; 532 chip = ptr;
532 card = chip->card; 533 card = chip->card;
533 mutex_lock(&register_mutex); 534 mutex_lock(&register_mutex);
535 mutex_lock(&chip->shutdown_mutex);
534 chip->shutdown = 1; 536 chip->shutdown = 1;
535 chip->num_interfaces--; 537 chip->num_interfaces--;
536 if (chip->num_interfaces <= 0) { 538 if (chip->num_interfaces <= 0) {
@@ -548,9 +550,11 @@ static void snd_usb_audio_disconnect(struct usb_device *dev, void *ptr)
548 snd_usb_mixer_disconnect(p); 550 snd_usb_mixer_disconnect(p);
549 } 551 }
550 usb_chip[chip->index] = NULL; 552 usb_chip[chip->index] = NULL;
553 mutex_unlock(&chip->shutdown_mutex);
551 mutex_unlock(&register_mutex); 554 mutex_unlock(&register_mutex);
552 snd_card_free_when_closed(card); 555 snd_card_free_when_closed(card);
553 } else { 556 } else {
557 mutex_unlock(&chip->shutdown_mutex);
554 mutex_unlock(&register_mutex); 558 mutex_unlock(&register_mutex);
555 } 559 }
556} 560}
diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
index 4132522ac90f..e3f680526cb5 100644
--- a/sound/usb/pcm.c
+++ b/sound/usb/pcm.c
@@ -361,6 +361,7 @@ static int snd_usb_hw_params(struct snd_pcm_substream *substream,
361 } 361 }
362 362
363 if (changed) { 363 if (changed) {
364 mutex_lock(&subs->stream->chip->shutdown_mutex);
364 /* format changed */ 365 /* format changed */
365 snd_usb_release_substream_urbs(subs, 0); 366 snd_usb_release_substream_urbs(subs, 0);
366 /* influenced: period_bytes, channels, rate, format, */ 367 /* influenced: period_bytes, channels, rate, format, */
@@ -368,6 +369,7 @@ static int snd_usb_hw_params(struct snd_pcm_substream *substream,
368 params_rate(hw_params), 369 params_rate(hw_params),
369 snd_pcm_format_physical_width(params_format(hw_params)) * 370 snd_pcm_format_physical_width(params_format(hw_params)) *
370 params_channels(hw_params)); 371 params_channels(hw_params));
372 mutex_unlock(&subs->stream->chip->shutdown_mutex);
371 } 373 }
372 374
373 return ret; 375 return ret;
@@ -385,8 +387,9 @@ static int snd_usb_hw_free(struct snd_pcm_substream *substream)
385 subs->cur_audiofmt = NULL; 387 subs->cur_audiofmt = NULL;
386 subs->cur_rate = 0; 388 subs->cur_rate = 0;
387 subs->period_bytes = 0; 389 subs->period_bytes = 0;
388 if (!subs->stream->chip->shutdown) 390 mutex_lock(&subs->stream->chip->shutdown_mutex);
389 snd_usb_release_substream_urbs(subs, 0); 391 snd_usb_release_substream_urbs(subs, 0);
392 mutex_unlock(&subs->stream->chip->shutdown_mutex);
390 return snd_pcm_lib_free_vmalloc_buffer(substream); 393 return snd_pcm_lib_free_vmalloc_buffer(substream);
391} 394}
392 395
diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h
index db3eb21627ee..6e66fffe87f5 100644
--- a/sound/usb/usbaudio.h
+++ b/sound/usb/usbaudio.h
@@ -36,6 +36,7 @@ struct snd_usb_audio {
36 struct snd_card *card; 36 struct snd_card *card;
37 u32 usb_id; 37 u32 usb_id;
38 int shutdown; 38 int shutdown;
39 struct mutex shutdown_mutex;
39 unsigned int txfr_quirk:1; /* Subframe boundaries on transfers */ 40 unsigned int txfr_quirk:1; /* Subframe boundaries on transfers */
40 int num_interfaces; 41 int num_interfaces;
41 int num_suspended_intf; 42 int num_suspended_intf;
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index cb43289e447f..416684be0ad3 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -1,4 +1,3 @@
1PERF-BUILD-OPTIONS
2PERF-CFLAGS 1PERF-CFLAGS
3PERF-GUI-VARS 2PERF-GUI-VARS
4PERF-VERSION-FILE 3PERF-VERSION-FILE
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index bd498d496952..4626a398836a 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -178,8 +178,8 @@ install-pdf: pdf
178 $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir) 178 $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir)
179 $(INSTALL) -m 644 user-manual.pdf $(DESTDIR)$(pdfdir) 179 $(INSTALL) -m 644 user-manual.pdf $(DESTDIR)$(pdfdir)
180 180
181install-html: html 181#install-html: html
182 '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir) 182# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir)
183 183
184../PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE 184../PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
185 $(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) PERF-VERSION-FILE 185 $(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) PERF-VERSION-FILE
@@ -288,15 +288,16 @@ $(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt
288 sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b xhtml11 - >$@+ && \ 288 sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b xhtml11 - >$@+ && \
289 mv $@+ $@ 289 mv $@+ $@
290 290
291install-webdoc : html 291# UNIMPLEMENTED
292 '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(WEBDOC_DEST) 292#install-webdoc : html
293# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(WEBDOC_DEST)
293 294
294quick-install: quick-install-man 295# quick-install: quick-install-man
295 296
296quick-install-man: 297# quick-install-man:
297 '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(DOC_REF) $(DESTDIR)$(mandir) 298# '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(DOC_REF) $(DESTDIR)$(mandir)
298 299
299quick-install-html: 300#quick-install-html:
300 '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir) 301# '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir)
301 302
302.PHONY: .FORCE-PERF-VERSION-FILE 303.PHONY: .FORCE-PERF-VERSION-FILE
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 399751befeed..7a527f7e9da9 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -8,7 +8,7 @@ perf-list - List all symbolic event types
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf list' 11'perf list' [hw|sw|cache|tracepoint|event_glob]
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
@@ -63,7 +63,26 @@ details. Some of them are referenced in the SEE ALSO section below.
63 63
64OPTIONS 64OPTIONS
65------- 65-------
66None 66
67Without options all known events will be listed.
68
69To limit the list use:
70
71. 'hw' or 'hardware' to list hardware events such as cache-misses, etc.
72
73. 'sw' or 'software' to list software events such as context switches, etc.
74
75. 'cache' or 'hwcache' to list hardware cache events such as L1-dcache-loads, etc.
76
77. 'tracepoint' to list all tracepoint events, alternatively use
78 'subsys_glob:event_glob' to filter by tracepoint subsystems such as sched,
79 block, etc.
80
81. If none of the above is matched, it will apply the supplied glob to all
82 events, printing the ones that match.
83
84One or more types can be used at the same time, listing the events for the
85types specified.
67 86
68SEE ALSO 87SEE ALSO
69-------- 88--------
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index 921de259ea10..4a26a2f3a6a3 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -24,8 +24,8 @@ and statistics with this 'perf lock' command.
24 24
25 'perf lock report' reports statistical data. 25 'perf lock report' reports statistical data.
26 26
27OPTIONS 27COMMON OPTIONS
28------- 28--------------
29 29
30-i:: 30-i::
31--input=<file>:: 31--input=<file>::
@@ -39,6 +39,14 @@ OPTIONS
39--dump-raw-trace:: 39--dump-raw-trace::
40 Dump raw trace in ASCII. 40 Dump raw trace in ASCII.
41 41
42REPORT OPTIONS
43--------------
44
45-k::
46--key=<value>::
47 Sorting key. Possible values: acquired (default), contended,
48 wait_total, wait_max, wait_min.
49
42SEE ALSO 50SEE ALSO
43-------- 51--------
44linkperf:perf[1] 52linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 86b797a35aa6..02bafce4b341 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -16,7 +16,7 @@ or
16or 16or
17'perf probe' --list 17'perf probe' --list
18or 18or
19'perf probe' [options] --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]' 19'perf probe' [options] --line='LINE'
20or 20or
21'perf probe' [options] --vars='PROBEPOINT' 21'perf probe' [options] --vars='PROBEPOINT'
22 22
@@ -73,6 +73,17 @@ OPTIONS
73 (Only for --vars) Show external defined variables in addition to local 73 (Only for --vars) Show external defined variables in addition to local
74 variables. 74 variables.
75 75
76-F::
77--funcs::
78 Show available functions in given module or kernel.
79
80--filter=FILTER::
81 (Only for --vars and --funcs) Set filter. FILTER is a combination of glob
82 pattern, see FILTER PATTERN for detail.
83 Default FILTER is "!__k???tab_* & !__crc_*" for --vars, and "!_*"
84 for --funcs.
85 If several filters are specified, only the last filter is used.
86
76-f:: 87-f::
77--force:: 88--force::
78 Forcibly add events with existing name. 89 Forcibly add events with existing name.
@@ -117,13 +128,14 @@ LINE SYNTAX
117----------- 128-----------
118Line range is described by following syntax. 129Line range is described by following syntax.
119 130
120 "FUNC[:RLN[+NUM|-RLN2]]|SRC[:ALN[+NUM|-ALN2]]" 131 "FUNC[@SRC][:RLN[+NUM|-RLN2]]|SRC[:ALN[+NUM|-ALN2]]"
121 132
122FUNC specifies the function name of showing lines. 'RLN' is the start line 133FUNC specifies the function name of showing lines. 'RLN' is the start line
123number from function entry line, and 'RLN2' is the end line number. As same as 134number from function entry line, and 'RLN2' is the end line number. As same as
124probe syntax, 'SRC' means the source file path, 'ALN' is start line number, 135probe syntax, 'SRC' means the source file path, 'ALN' is start line number,
125and 'ALN2' is end line number in the file. It is also possible to specify how 136and 'ALN2' is end line number in the file. It is also possible to specify how
126many lines to show by using 'NUM'. 137many lines to show by using 'NUM'. Moreover, 'FUNC@SRC' combination is good
138for searching a specific function when several functions share same name.
127So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function. 139So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function.
128 140
129LAZY MATCHING 141LAZY MATCHING
@@ -135,6 +147,14 @@ e.g.
135 147
136This provides some sort of flexibility and robustness to probe point definitions against minor code changes. For example, actual 10th line of schedule() can be moved easily by modifying schedule(), but the same line matching 'rq=cpu_rq*' may still exist in the function.) 148This provides some sort of flexibility and robustness to probe point definitions against minor code changes. For example, actual 10th line of schedule() can be moved easily by modifying schedule(), but the same line matching 'rq=cpu_rq*' may still exist in the function.)
137 149
150FILTER PATTERN
151--------------
152 The filter pattern is a glob matching pattern(s) to filter variables.
153 In addition, you can use "!" for specifying filter-out rule. You also can give several rules combined with "&" or "|", and fold those rules as one rule by using "(" ")".
154
155e.g.
156 With --filter "foo* | bar*", perf probe -V shows variables which start with "foo" or "bar".
157 With --filter "!foo* & *bar", perf probe -V shows variables which don't start with "foo" and end with "bar", like "fizzbar". But "foobar" is filtered out.
138 158
139EXAMPLES 159EXAMPLES
140-------- 160--------
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index e032716c839b..5a520f825295 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -137,6 +137,17 @@ Do not update the builid cache. This saves some overhead in situations
137where the information in the perf.data file (which includes buildids) 137where the information in the perf.data file (which includes buildids)
138is sufficient. 138is sufficient.
139 139
140-G name,...::
141--cgroup name,...::
142monitor only in the container (cgroup) called "name". This option is available only
143in per-cpu mode. The cgroup filesystem must be mounted. All threads belonging to
144container "name" are monitored when they run on the monitored CPUs. Multiple cgroups
145can be provided. Each cgroup is applied to the corresponding event, i.e., first cgroup
146to first event, second cgroup to second event and so on. It is possible to provide
147an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must have
148corresponding events, i.e., they always refer to events defined earlier on the command
149line.
150
140SEE ALSO 151SEE ALSO
141-------- 152--------
142linkperf:perf-stat[1], linkperf:perf-list[1] 153linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index b6da7affbbee..918cc38ee6d1 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -83,6 +83,17 @@ This option is only valid in system-wide mode.
83print counts using a CSV-style output to make it easy to import directly into 83print counts using a CSV-style output to make it easy to import directly into
84spreadsheets. Columns are separated by the string specified in SEP. 84spreadsheets. Columns are separated by the string specified in SEP.
85 85
86-G name::
87--cgroup name::
88monitor only in the container (cgroup) called "name". This option is available only
89in per-cpu mode. The cgroup filesystem must be mounted. All threads belonging to
90container "name" are monitored when they run on the monitored CPUs. Multiple cgroups
91can be provided. Each cgroup is applied to the corresponding event, i.e., first cgroup
92to first event, second cgroup to second event and so on. It is possible to provide
93an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must have
94corresponding events, i.e., they always refer to events defined earlier on the command
95line.
96
86EXAMPLES 97EXAMPLES
87-------- 98--------
88 99
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 7141c42e1469..9b8421805c5c 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -3,7 +3,7 @@ ifeq ("$(origin O)", "command line")
3endif 3endif
4 4
5# The default target of this Makefile is... 5# The default target of this Makefile is...
6all:: 6all:
7 7
8ifneq ($(OUTPUT),) 8ifneq ($(OUTPUT),)
9# check that the output directory actually exists 9# check that the output directory actually exists
@@ -11,152 +11,12 @@ OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd)
11$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) 11$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist))
12endif 12endif
13 13
14# Define V=1 to have a more verbose compile. 14# Define V to have a more verbose compile.
15# Define V=2 to have an even more verbose compile.
16#
17# Define SNPRINTF_RETURNS_BOGUS if your are on a system which snprintf()
18# or vsnprintf() return -1 instead of number of characters which would
19# have been written to the final string if enough space had been available.
20#
21# Define FREAD_READS_DIRECTORIES if your are on a system which succeeds
22# when attempting to read from an fopen'ed directory.
23#
24# Define NO_OPENSSL environment variable if you do not have OpenSSL.
25# This also implies MOZILLA_SHA1.
26#
27# Define CURLDIR=/foo/bar if your curl header and library files are in
28# /foo/bar/include and /foo/bar/lib directories.
29#
30# Define EXPATDIR=/foo/bar if your expat header and library files are in
31# /foo/bar/include and /foo/bar/lib directories.
32#
33# Define NO_D_INO_IN_DIRENT if you don't have d_ino in your struct dirent.
34#
35# Define NO_D_TYPE_IN_DIRENT if your platform defines DT_UNKNOWN but lacks
36# d_type in struct dirent (latest Cygwin -- will be fixed soonish).
37#
38# Define NO_C99_FORMAT if your formatted IO functions (printf/scanf et.al.)
39# do not support the 'size specifiers' introduced by C99, namely ll, hh,
40# j, z, t. (representing long long int, char, intmax_t, size_t, ptrdiff_t).
41# some C compilers supported these specifiers prior to C99 as an extension.
42#
43# Define NO_STRCASESTR if you don't have strcasestr.
44#
45# Define NO_MEMMEM if you don't have memmem.
46#
47# Define NO_STRTOUMAX if you don't have strtoumax in the C library.
48# If your compiler also does not support long long or does not have
49# strtoull, define NO_STRTOULL.
50#
51# Define NO_SETENV if you don't have setenv in the C library.
52#
53# Define NO_UNSETENV if you don't have unsetenv in the C library.
54#
55# Define NO_MKDTEMP if you don't have mkdtemp in the C library.
56#
57# Define NO_SYS_SELECT_H if you don't have sys/select.h.
58#
59# Define NO_SYMLINK_HEAD if you never want .perf/HEAD to be a symbolic link.
60# Enable it on Windows. By default, symrefs are still used.
61#
62# Define NO_SVN_TESTS if you want to skip time-consuming SVN interoperability
63# tests. These tests take up a significant amount of the total test time
64# but are not needed unless you plan to talk to SVN repos.
65#
66# Define NO_FINK if you are building on Darwin/Mac OS X, have Fink
67# installed in /sw, but don't want PERF to link against any libraries
68# installed there. If defined you may specify your own (or Fink's)
69# include directories and library directories by defining CFLAGS
70# and LDFLAGS appropriately.
71#
72# Define NO_DARWIN_PORTS if you are building on Darwin/Mac OS X,
73# have DarwinPorts installed in /opt/local, but don't want PERF to
74# link against any libraries installed there. If defined you may
75# specify your own (or DarwinPort's) include directories and
76# library directories by defining CFLAGS and LDFLAGS appropriately.
77#
78# Define PPC_SHA1 environment variable when running make to make use of
79# a bundled SHA1 routine optimized for PowerPC.
80#
81# Define ARM_SHA1 environment variable when running make to make use of
82# a bundled SHA1 routine optimized for ARM.
83#
84# Define MOZILLA_SHA1 environment variable when running make to make use of
85# a bundled SHA1 routine coming from Mozilla. It is GPL'd and should be fast
86# on non-x86 architectures (e.g. PowerPC), while the OpenSSL version (default
87# choice) has very fast version optimized for i586.
88#
89# Define NEEDS_SSL_WITH_CRYPTO if you need -lcrypto with -lssl (Darwin).
90#
91# Define NEEDS_LIBICONV if linking with libc is not enough (Darwin).
92#
93# Define NEEDS_SOCKET if linking with libc is not enough (SunOS,
94# Patrick Mauritz).
95#
96# Define NO_MMAP if you want to avoid mmap.
97#
98# Define NO_PTHREADS if you do not have or do not want to use Pthreads.
99#
100# Define NO_PREAD if you have a problem with pread() system call (e.g.
101# cygwin.dll before v1.5.22).
102#
103# Define NO_FAST_WORKING_DIRECTORY if accessing objects in pack files is
104# generally faster on your platform than accessing the working directory.
105#
106# Define NO_TRUSTABLE_FILEMODE if your filesystem may claim to support
107# the executable mode bit, but doesn't really do so.
108#
109# Define NO_IPV6 if you lack IPv6 support and getaddrinfo().
110#
111# Define NO_SOCKADDR_STORAGE if your platform does not have struct
112# sockaddr_storage.
113#
114# Define NO_ICONV if your libc does not properly support iconv.
115#
116# Define OLD_ICONV if your library has an old iconv(), where the second
117# (input buffer pointer) parameter is declared with type (const char **).
118#
119# Define NO_DEFLATE_BOUND if your zlib does not have deflateBound.
120#
121# Define NO_R_TO_GCC_LINKER if your gcc does not like "-R/path/lib"
122# that tells runtime paths to dynamic libraries;
123# "-Wl,-rpath=/path/lib" is used instead.
124#
125# Define USE_NSEC below if you want perf to care about sub-second file mtimes
126# and ctimes. Note that you need recent glibc (at least 2.2.4) for this, and
127# it will BREAK YOUR LOCAL DIFFS! show-diff and anything using it will likely
128# randomly break unless your underlying filesystem supports those sub-second
129# times (my ext3 doesn't).
130#
131# Define USE_ST_TIMESPEC if your "struct stat" uses "st_ctimespec" instead of
132# "st_ctim"
133#
134# Define NO_NSEC if your "struct stat" does not have "st_ctim.tv_nsec"
135# available. This automatically turns USE_NSEC off.
136#
137# Define USE_STDEV below if you want perf to care about the underlying device
138# change being considered an inode change from the update-index perspective.
139#
140# Define NO_ST_BLOCKS_IN_STRUCT_STAT if your platform does not have st_blocks
141# field that counts the on-disk footprint in 512-byte blocks.
142# 15#
143# Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8 16# Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8
144# 17#
145# Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72. 18# Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72.
146# 19#
147# Define NO_PERL_MAKEMAKER if you cannot use Makefiles generated by perl's
148# MakeMaker (e.g. using ActiveState under Cygwin).
149#
150# Define NO_PERL if you do not want Perl scripts or libraries at all.
151#
152# Define INTERNAL_QSORT to use Git's implementation of qsort(), which
153# is a simplified version of the merge sort used in glibc. This is
154# recommended if Git triggers O(n^2) behavior in your platform's qsort().
155#
156# Define NO_EXTERNAL_GREP if you don't want "perf grep" to ever call
157# your external grep (e.g., if your system lacks grep, if its grep is
158# broken, or spawning external process is slower than built-in grep perf has).
159#
160# Define LDFLAGS=-static to build a static binary. 20# Define LDFLAGS=-static to build a static binary.
161# 21#
162# Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds. 22# Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds.
@@ -167,12 +27,7 @@ $(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
167 @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) 27 @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
168-include $(OUTPUT)PERF-VERSION-FILE 28-include $(OUTPUT)PERF-VERSION-FILE
169 29
170uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') 30uname_M := $(shell uname -m 2>/dev/null || echo not)
171uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not')
172uname_O := $(shell sh -c 'uname -o 2>/dev/null || echo not')
173uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not')
174uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not')
175uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not')
176 31
177ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ 32ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \
178 -e s/arm.*/arm/ -e s/sa110/arm/ \ 33 -e s/arm.*/arm/ -e s/sa110/arm/ \
@@ -191,8 +46,6 @@ ifeq ($(ARCH),x86_64)
191 ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S 46 ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S
192endif 47endif
193 48
194# CFLAGS and LDFLAGS are for the users to override from the command line.
195
196# 49#
197# Include saner warnings here, which can catch bugs: 50# Include saner warnings here, which can catch bugs:
198# 51#
@@ -270,22 +123,13 @@ CC = $(CROSS_COMPILE)gcc
270AR = $(CROSS_COMPILE)ar 123AR = $(CROSS_COMPILE)ar
271RM = rm -f 124RM = rm -f
272MKDIR = mkdir 125MKDIR = mkdir
273TAR = tar
274FIND = find 126FIND = find
275INSTALL = install 127INSTALL = install
276RPMBUILD = rpmbuild
277PTHREAD_LIBS = -lpthread
278 128
279# sparse is architecture-neutral, which means that we need to tell it 129# sparse is architecture-neutral, which means that we need to tell it
280# explicitly what architecture to check for. Fix this up for yours.. 130# explicitly what architecture to check for. Fix this up for yours..
281SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ 131SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
282 132
283ifeq ($(V), 2)
284 QUIET_STDERR = ">/dev/null"
285else
286 QUIET_STDERR = ">/dev/null 2>&1"
287endif
288
289-include feature-tests.mak 133-include feature-tests.mak
290 134
291ifeq ($(call try-cc,$(SOURCE_HELLO),-Werror -fstack-protector-all),y) 135ifeq ($(call try-cc,$(SOURCE_HELLO),-Werror -fstack-protector-all),y)
@@ -310,49 +154,37 @@ BASIC_LDFLAGS =
310 154
311# Guard against environment variables 155# Guard against environment variables
312BUILTIN_OBJS = 156BUILTIN_OBJS =
313BUILT_INS =
314COMPAT_CFLAGS =
315COMPAT_OBJS =
316LIB_H = 157LIB_H =
317LIB_OBJS = 158LIB_OBJS =
318SCRIPT_PERL = 159PYRF_OBJS =
319SCRIPT_SH = 160SCRIPT_SH =
320TEST_PROGRAMS =
321 161
322SCRIPT_SH += perf-archive.sh 162SCRIPT_SH += perf-archive.sh
323 163
324grep-libs = $(filter -l%,$(1)) 164grep-libs = $(filter -l%,$(1))
325strip-libs = $(filter-out -l%,$(1)) 165strip-libs = $(filter-out -l%,$(1))
326 166
167$(OUTPUT)python/perf.so: $(PYRF_OBJS)
168 $(QUIET_GEN)python util/setup.py --quiet build_ext --build-lib='$(OUTPUT)python' \
169 --build-temp='$(OUTPUT)python/temp'
327# 170#
328# No Perl scripts right now: 171# No Perl scripts right now:
329# 172#
330 173
331# SCRIPT_PERL += perf-add--interactive.perl 174SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH))
332
333SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) \
334 $(patsubst %.perl,%,$(SCRIPT_PERL))
335
336# Empty...
337EXTRA_PROGRAMS =
338
339# ... and all the rest that could be moved out of bindir to perfexecdir
340PROGRAMS += $(EXTRA_PROGRAMS)
341 175
342# 176#
343# Single 'perf' binary right now: 177# Single 'perf' binary right now:
344# 178#
345PROGRAMS += $(OUTPUT)perf 179PROGRAMS += $(OUTPUT)perf
346 180
347# List built-in command $C whose implementation cmd_$C() is not in 181LANG_BINDINGS =
348# builtin-$C.o but is linked in as part of some other command.
349#
350 182
351# what 'all' will build and 'install' will install, in perfexecdir 183# what 'all' will build and 'install' will install, in perfexecdir
352ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) 184ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
353 185
354# what 'all' will build but not install in perfexecdir 186# what 'all' will build but not install in perfexecdir
355OTHER_PROGRAMS = $(OUTPUT)perf$X 187OTHER_PROGRAMS = $(OUTPUT)perf
356 188
357# Set paths to tools early so that they can be used for version tests. 189# Set paths to tools early so that they can be used for version tests.
358ifndef SHELL_PATH 190ifndef SHELL_PATH
@@ -395,6 +227,7 @@ LIB_H += util/include/dwarf-regs.h
395LIB_H += util/include/asm/dwarf2.h 227LIB_H += util/include/asm/dwarf2.h
396LIB_H += util/include/asm/cpufeature.h 228LIB_H += util/include/asm/cpufeature.h
397LIB_H += perf.h 229LIB_H += perf.h
230LIB_H += util/annotate.h
398LIB_H += util/cache.h 231LIB_H += util/cache.h
399LIB_H += util/callchain.h 232LIB_H += util/callchain.h
400LIB_H += util/build-id.h 233LIB_H += util/build-id.h
@@ -402,6 +235,7 @@ LIB_H += util/debug.h
402LIB_H += util/debugfs.h 235LIB_H += util/debugfs.h
403LIB_H += util/event.h 236LIB_H += util/event.h
404LIB_H += util/evsel.h 237LIB_H += util/evsel.h
238LIB_H += util/evlist.h
405LIB_H += util/exec_cmd.h 239LIB_H += util/exec_cmd.h
406LIB_H += util/types.h 240LIB_H += util/types.h
407LIB_H += util/levenshtein.h 241LIB_H += util/levenshtein.h
@@ -416,6 +250,7 @@ LIB_H += util/help.h
416LIB_H += util/session.h 250LIB_H += util/session.h
417LIB_H += util/strbuf.h 251LIB_H += util/strbuf.h
418LIB_H += util/strlist.h 252LIB_H += util/strlist.h
253LIB_H += util/strfilter.h
419LIB_H += util/svghelper.h 254LIB_H += util/svghelper.h
420LIB_H += util/run-command.h 255LIB_H += util/run-command.h
421LIB_H += util/sigchain.h 256LIB_H += util/sigchain.h
@@ -425,21 +260,26 @@ LIB_H += util/values.h
425LIB_H += util/sort.h 260LIB_H += util/sort.h
426LIB_H += util/hist.h 261LIB_H += util/hist.h
427LIB_H += util/thread.h 262LIB_H += util/thread.h
263LIB_H += util/thread_map.h
428LIB_H += util/trace-event.h 264LIB_H += util/trace-event.h
429LIB_H += util/probe-finder.h 265LIB_H += util/probe-finder.h
430LIB_H += util/probe-event.h 266LIB_H += util/probe-event.h
431LIB_H += util/pstack.h 267LIB_H += util/pstack.h
432LIB_H += util/cpumap.h 268LIB_H += util/cpumap.h
269LIB_H += util/top.h
433LIB_H += $(ARCH_INCLUDE) 270LIB_H += $(ARCH_INCLUDE)
271LIB_H += util/cgroup.h
434 272
435LIB_OBJS += $(OUTPUT)util/abspath.o 273LIB_OBJS += $(OUTPUT)util/abspath.o
436LIB_OBJS += $(OUTPUT)util/alias.o 274LIB_OBJS += $(OUTPUT)util/alias.o
275LIB_OBJS += $(OUTPUT)util/annotate.o
437LIB_OBJS += $(OUTPUT)util/build-id.o 276LIB_OBJS += $(OUTPUT)util/build-id.o
438LIB_OBJS += $(OUTPUT)util/config.o 277LIB_OBJS += $(OUTPUT)util/config.o
439LIB_OBJS += $(OUTPUT)util/ctype.o 278LIB_OBJS += $(OUTPUT)util/ctype.o
440LIB_OBJS += $(OUTPUT)util/debugfs.o 279LIB_OBJS += $(OUTPUT)util/debugfs.o
441LIB_OBJS += $(OUTPUT)util/environment.o 280LIB_OBJS += $(OUTPUT)util/environment.o
442LIB_OBJS += $(OUTPUT)util/event.o 281LIB_OBJS += $(OUTPUT)util/event.o
282LIB_OBJS += $(OUTPUT)util/evlist.o
443LIB_OBJS += $(OUTPUT)util/evsel.o 283LIB_OBJS += $(OUTPUT)util/evsel.o
444LIB_OBJS += $(OUTPUT)util/exec_cmd.o 284LIB_OBJS += $(OUTPUT)util/exec_cmd.o
445LIB_OBJS += $(OUTPUT)util/help.o 285LIB_OBJS += $(OUTPUT)util/help.o
@@ -455,6 +295,8 @@ LIB_OBJS += $(OUTPUT)util/quote.o
455LIB_OBJS += $(OUTPUT)util/strbuf.o 295LIB_OBJS += $(OUTPUT)util/strbuf.o
456LIB_OBJS += $(OUTPUT)util/string.o 296LIB_OBJS += $(OUTPUT)util/string.o
457LIB_OBJS += $(OUTPUT)util/strlist.o 297LIB_OBJS += $(OUTPUT)util/strlist.o
298LIB_OBJS += $(OUTPUT)util/strfilter.o
299LIB_OBJS += $(OUTPUT)util/top.o
458LIB_OBJS += $(OUTPUT)util/usage.o 300LIB_OBJS += $(OUTPUT)util/usage.o
459LIB_OBJS += $(OUTPUT)util/wrapper.o 301LIB_OBJS += $(OUTPUT)util/wrapper.o
460LIB_OBJS += $(OUTPUT)util/sigchain.o 302LIB_OBJS += $(OUTPUT)util/sigchain.o
@@ -469,6 +311,7 @@ LIB_OBJS += $(OUTPUT)util/map.o
469LIB_OBJS += $(OUTPUT)util/pstack.o 311LIB_OBJS += $(OUTPUT)util/pstack.o
470LIB_OBJS += $(OUTPUT)util/session.o 312LIB_OBJS += $(OUTPUT)util/session.o
471LIB_OBJS += $(OUTPUT)util/thread.o 313LIB_OBJS += $(OUTPUT)util/thread.o
314LIB_OBJS += $(OUTPUT)util/thread_map.o
472LIB_OBJS += $(OUTPUT)util/trace-event-parse.o 315LIB_OBJS += $(OUTPUT)util/trace-event-parse.o
473LIB_OBJS += $(OUTPUT)util/trace-event-read.o 316LIB_OBJS += $(OUTPUT)util/trace-event-read.o
474LIB_OBJS += $(OUTPUT)util/trace-event-info.o 317LIB_OBJS += $(OUTPUT)util/trace-event-info.o
@@ -480,6 +323,7 @@ LIB_OBJS += $(OUTPUT)util/probe-event.o
480LIB_OBJS += $(OUTPUT)util/util.o 323LIB_OBJS += $(OUTPUT)util/util.o
481LIB_OBJS += $(OUTPUT)util/xyarray.o 324LIB_OBJS += $(OUTPUT)util/xyarray.o
482LIB_OBJS += $(OUTPUT)util/cpumap.o 325LIB_OBJS += $(OUTPUT)util/cpumap.o
326LIB_OBJS += $(OUTPUT)util/cgroup.o
483 327
484BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o 328BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
485 329
@@ -514,6 +358,20 @@ BUILTIN_OBJS += $(OUTPUT)builtin-inject.o
514 358
515PERFLIBS = $(LIB_FILE) 359PERFLIBS = $(LIB_FILE)
516 360
361# Files needed for the python binding, perf.so
362# pyrf is just an internal name needed for all those wrappers.
363# This has to be in sync with what is in the 'sources' variable in
364# tools/perf/util/setup.py
365
366PYRF_OBJS += $(OUTPUT)util/cpumap.o
367PYRF_OBJS += $(OUTPUT)util/ctype.o
368PYRF_OBJS += $(OUTPUT)util/evlist.o
369PYRF_OBJS += $(OUTPUT)util/evsel.o
370PYRF_OBJS += $(OUTPUT)util/python.o
371PYRF_OBJS += $(OUTPUT)util/thread_map.o
372PYRF_OBJS += $(OUTPUT)util/util.o
373PYRF_OBJS += $(OUTPUT)util/xyarray.o
374
517# 375#
518# Platform specific tweaks 376# Platform specific tweaks
519# 377#
@@ -535,22 +393,6 @@ endif # NO_DWARF
535 393
536-include arch/$(ARCH)/Makefile 394-include arch/$(ARCH)/Makefile
537 395
538ifeq ($(uname_S),Darwin)
539 ifndef NO_FINK
540 ifeq ($(shell test -d /sw/lib && echo y),y)
541 BASIC_CFLAGS += -I/sw/include
542 BASIC_LDFLAGS += -L/sw/lib
543 endif
544 endif
545 ifndef NO_DARWIN_PORTS
546 ifeq ($(shell test -d /opt/local/lib && echo y),y)
547 BASIC_CFLAGS += -I/opt/local/include
548 BASIC_LDFLAGS += -L/opt/local/lib
549 endif
550 endif
551 PTHREAD_LIBS =
552endif
553
554ifneq ($(OUTPUT),) 396ifneq ($(OUTPUT),)
555 BASIC_CFLAGS += -I$(OUTPUT) 397 BASIC_CFLAGS += -I$(OUTPUT)
556endif 398endif
@@ -595,6 +437,7 @@ else
595 LIB_OBJS += $(OUTPUT)util/ui/browsers/annotate.o 437 LIB_OBJS += $(OUTPUT)util/ui/browsers/annotate.o
596 LIB_OBJS += $(OUTPUT)util/ui/browsers/hists.o 438 LIB_OBJS += $(OUTPUT)util/ui/browsers/hists.o
597 LIB_OBJS += $(OUTPUT)util/ui/browsers/map.o 439 LIB_OBJS += $(OUTPUT)util/ui/browsers/map.o
440 LIB_OBJS += $(OUTPUT)util/ui/browsers/top.o
598 LIB_OBJS += $(OUTPUT)util/ui/helpline.o 441 LIB_OBJS += $(OUTPUT)util/ui/helpline.o
599 LIB_OBJS += $(OUTPUT)util/ui/progress.o 442 LIB_OBJS += $(OUTPUT)util/ui/progress.o
600 LIB_OBJS += $(OUTPUT)util/ui/util.o 443 LIB_OBJS += $(OUTPUT)util/ui/util.o
@@ -604,6 +447,7 @@ else
604 LIB_H += util/ui/libslang.h 447 LIB_H += util/ui/libslang.h
605 LIB_H += util/ui/progress.h 448 LIB_H += util/ui/progress.h
606 LIB_H += util/ui/util.h 449 LIB_H += util/ui/util.h
450 LIB_H += util/ui/ui.h
607 endif 451 endif
608endif 452endif
609 453
@@ -635,12 +479,14 @@ else
635 PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null` 479 PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null`
636 FLAGS_PYTHON_EMBED=$(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) 480 FLAGS_PYTHON_EMBED=$(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
637 ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED)),y) 481 ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED)),y)
482 msg := $(warning No Python.h found, install python-dev[el] to have python support in 'perf script' and to build the python bindings)
638 BASIC_CFLAGS += -DNO_LIBPYTHON 483 BASIC_CFLAGS += -DNO_LIBPYTHON
639 else 484 else
640 ALL_LDFLAGS += $(PYTHON_EMBED_LDFLAGS) 485 ALL_LDFLAGS += $(PYTHON_EMBED_LDFLAGS)
641 EXTLIBS += $(PYTHON_EMBED_LIBADD) 486 EXTLIBS += $(PYTHON_EMBED_LIBADD)
642 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o 487 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o
643 LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o 488 LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o
489 LANG_BINDINGS += $(OUTPUT)python/perf.so
644 endif 490 endif
645endif 491endif
646 492
@@ -690,201 +536,13 @@ else
690 endif 536 endif
691endif 537endif
692 538
693ifndef CC_LD_DYNPATH
694 ifdef NO_R_TO_GCC_LINKER
695 # Some gcc does not accept and pass -R to the linker to specify
696 # the runtime dynamic library path.
697 CC_LD_DYNPATH = -Wl,-rpath,
698 else
699 CC_LD_DYNPATH = -R
700 endif
701endif
702
703ifdef NEEDS_SOCKET
704 EXTLIBS += -lsocket
705endif
706ifdef NEEDS_NSL
707 EXTLIBS += -lnsl
708endif
709ifdef NO_D_TYPE_IN_DIRENT
710 BASIC_CFLAGS += -DNO_D_TYPE_IN_DIRENT
711endif
712ifdef NO_D_INO_IN_DIRENT
713 BASIC_CFLAGS += -DNO_D_INO_IN_DIRENT
714endif
715ifdef NO_ST_BLOCKS_IN_STRUCT_STAT
716 BASIC_CFLAGS += -DNO_ST_BLOCKS_IN_STRUCT_STAT
717endif
718ifdef USE_NSEC
719 BASIC_CFLAGS += -DUSE_NSEC
720endif
721ifdef USE_ST_TIMESPEC
722 BASIC_CFLAGS += -DUSE_ST_TIMESPEC
723endif
724ifdef NO_NSEC
725 BASIC_CFLAGS += -DNO_NSEC
726endif
727ifdef NO_C99_FORMAT
728 BASIC_CFLAGS += -DNO_C99_FORMAT
729endif
730ifdef SNPRINTF_RETURNS_BOGUS
731 COMPAT_CFLAGS += -DSNPRINTF_RETURNS_BOGUS
732 COMPAT_OBJS += $(OUTPUT)compat/snprintf.o
733endif
734ifdef FREAD_READS_DIRECTORIES
735 COMPAT_CFLAGS += -DFREAD_READS_DIRECTORIES
736 COMPAT_OBJS += $(OUTPUT)compat/fopen.o
737endif
738ifdef NO_SYMLINK_HEAD
739 BASIC_CFLAGS += -DNO_SYMLINK_HEAD
740endif
741ifdef NO_STRCASESTR
742 COMPAT_CFLAGS += -DNO_STRCASESTR
743 COMPAT_OBJS += $(OUTPUT)compat/strcasestr.o
744endif
745ifdef NO_STRTOUMAX
746 COMPAT_CFLAGS += -DNO_STRTOUMAX
747 COMPAT_OBJS += $(OUTPUT)compat/strtoumax.o
748endif
749ifdef NO_STRTOULL
750 COMPAT_CFLAGS += -DNO_STRTOULL
751endif
752ifdef NO_SETENV
753 COMPAT_CFLAGS += -DNO_SETENV
754 COMPAT_OBJS += $(OUTPUT)compat/setenv.o
755endif
756ifdef NO_MKDTEMP
757 COMPAT_CFLAGS += -DNO_MKDTEMP
758 COMPAT_OBJS += $(OUTPUT)compat/mkdtemp.o
759endif
760ifdef NO_UNSETENV
761 COMPAT_CFLAGS += -DNO_UNSETENV
762 COMPAT_OBJS += $(OUTPUT)compat/unsetenv.o
763endif
764ifdef NO_SYS_SELECT_H
765 BASIC_CFLAGS += -DNO_SYS_SELECT_H
766endif
767ifdef NO_MMAP
768 COMPAT_CFLAGS += -DNO_MMAP
769 COMPAT_OBJS += $(OUTPUT)compat/mmap.o
770else
771 ifdef USE_WIN32_MMAP
772 COMPAT_CFLAGS += -DUSE_WIN32_MMAP
773 COMPAT_OBJS += $(OUTPUT)compat/win32mmap.o
774 endif
775endif
776ifdef NO_PREAD
777 COMPAT_CFLAGS += -DNO_PREAD
778 COMPAT_OBJS += $(OUTPUT)compat/pread.o
779endif
780ifdef NO_FAST_WORKING_DIRECTORY
781 BASIC_CFLAGS += -DNO_FAST_WORKING_DIRECTORY
782endif
783ifdef NO_TRUSTABLE_FILEMODE
784 BASIC_CFLAGS += -DNO_TRUSTABLE_FILEMODE
785endif
786ifdef NO_IPV6
787 BASIC_CFLAGS += -DNO_IPV6
788endif
789ifdef NO_UINTMAX_T
790 BASIC_CFLAGS += -Duintmax_t=uint32_t
791endif
792ifdef NO_SOCKADDR_STORAGE
793ifdef NO_IPV6
794 BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in
795else
796 BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in6
797endif
798endif
799ifdef NO_INET_NTOP
800 LIB_OBJS += $(OUTPUT)compat/inet_ntop.o
801endif
802ifdef NO_INET_PTON
803 LIB_OBJS += $(OUTPUT)compat/inet_pton.o
804endif
805
806ifdef NO_ICONV
807 BASIC_CFLAGS += -DNO_ICONV
808endif
809
810ifdef OLD_ICONV
811 BASIC_CFLAGS += -DOLD_ICONV
812endif
813
814ifdef NO_DEFLATE_BOUND
815 BASIC_CFLAGS += -DNO_DEFLATE_BOUND
816endif
817
818ifdef PPC_SHA1
819 SHA1_HEADER = "ppc/sha1.h"
820 LIB_OBJS += $(OUTPUT)ppc/sha1.o ppc/sha1ppc.o
821else
822ifdef ARM_SHA1
823 SHA1_HEADER = "arm/sha1.h"
824 LIB_OBJS += $(OUTPUT)arm/sha1.o $(OUTPUT)arm/sha1_arm.o
825else
826ifdef MOZILLA_SHA1
827 SHA1_HEADER = "mozilla-sha1/sha1.h"
828 LIB_OBJS += $(OUTPUT)mozilla-sha1/sha1.o
829else
830 SHA1_HEADER = <openssl/sha.h>
831 EXTLIBS += $(LIB_4_CRYPTO)
832endif
833endif
834endif
835ifdef NO_PERL_MAKEMAKER
836 export NO_PERL_MAKEMAKER
837endif
838ifdef NO_HSTRERROR
839 COMPAT_CFLAGS += -DNO_HSTRERROR
840 COMPAT_OBJS += $(OUTPUT)compat/hstrerror.o
841endif
842ifdef NO_MEMMEM
843 COMPAT_CFLAGS += -DNO_MEMMEM
844 COMPAT_OBJS += $(OUTPUT)compat/memmem.o
845endif
846ifdef INTERNAL_QSORT
847 COMPAT_CFLAGS += -DINTERNAL_QSORT
848 COMPAT_OBJS += $(OUTPUT)compat/qsort.o
849endif
850ifdef RUNTIME_PREFIX
851 COMPAT_CFLAGS += -DRUNTIME_PREFIX
852endif
853
854ifdef DIR_HAS_BSD_GROUP_SEMANTICS
855 COMPAT_CFLAGS += -DDIR_HAS_BSD_GROUP_SEMANTICS
856endif
857ifdef NO_EXTERNAL_GREP
858 BASIC_CFLAGS += -DNO_EXTERNAL_GREP
859endif
860
861ifeq ($(PERL_PATH),)
862NO_PERL=NoThanks
863endif
864
865QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir
866QUIET_SUBDIR1 =
867
868ifneq ($(findstring $(MAKEFLAGS),w),w)
869PRINT_DIR = --no-print-directory
870else # "make -w"
871NO_SUBDIR = :
872endif
873
874ifneq ($(findstring $(MAKEFLAGS),s),s) 539ifneq ($(findstring $(MAKEFLAGS),s),s)
875ifndef V 540ifndef V
876 QUIET_CC = @echo ' ' CC $@; 541 QUIET_CC = @echo ' ' CC $@;
877 QUIET_AR = @echo ' ' AR $@; 542 QUIET_AR = @echo ' ' AR $@;
878 QUIET_LINK = @echo ' ' LINK $@; 543 QUIET_LINK = @echo ' ' LINK $@;
879 QUIET_MKDIR = @echo ' ' MKDIR $@; 544 QUIET_MKDIR = @echo ' ' MKDIR $@;
880 QUIET_BUILT_IN = @echo ' ' BUILTIN $@;
881 QUIET_GEN = @echo ' ' GEN $@; 545 QUIET_GEN = @echo ' ' GEN $@;
882 QUIET_SUBDIR0 = +@subdir=
883 QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \
884 $(MAKE) $(PRINT_DIR) -C $$subdir
885 export V
886 export QUIET_GEN
887 export QUIET_BUILT_IN
888endif 546endif
889endif 547endif
890 548
@@ -894,7 +552,6 @@ endif
894 552
895# Shell quote (do not use $(call) to accommodate ancient setups); 553# Shell quote (do not use $(call) to accommodate ancient setups);
896 554
897SHA1_HEADER_SQ = $(subst ','\'',$(SHA1_HEADER))
898ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG)) 555ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG))
899 556
900DESTDIR_SQ = $(subst ','\'',$(DESTDIR)) 557DESTDIR_SQ = $(subst ','\'',$(DESTDIR))
@@ -908,46 +565,36 @@ htmldir_SQ = $(subst ','\'',$(htmldir))
908prefix_SQ = $(subst ','\'',$(prefix)) 565prefix_SQ = $(subst ','\'',$(prefix))
909 566
910SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) 567SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
911PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH))
912 568
913LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive $(EXTLIBS) 569LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive $(EXTLIBS)
914 570
915BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \
916 $(COMPAT_CFLAGS)
917LIB_OBJS += $(COMPAT_OBJS)
918
919ALL_CFLAGS += $(BASIC_CFLAGS) 571ALL_CFLAGS += $(BASIC_CFLAGS)
920ALL_CFLAGS += $(ARCH_CFLAGS) 572ALL_CFLAGS += $(ARCH_CFLAGS)
921ALL_LDFLAGS += $(BASIC_LDFLAGS) 573ALL_LDFLAGS += $(BASIC_LDFLAGS)
922 574
923export TAR INSTALL DESTDIR SHELL_PATH 575export INSTALL SHELL_PATH
924 576
925 577
926### Build rules 578### Build rules
927 579
928SHELL = $(SHELL_PATH) 580SHELL = $(SHELL_PATH)
929 581
930all:: shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) $(OUTPUT)PERF-BUILD-OPTIONS 582all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS)
931ifneq (,$X)
932 $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), test '$p' -ef '$p$X' || $(RM) '$p';)
933endif
934
935all::
936 583
937please_set_SHELL_PATH_to_a_more_modern_shell: 584please_set_SHELL_PATH_to_a_more_modern_shell:
938 @$$(:) 585 @$$(:)
939 586
940shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell 587shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell
941 588
942strip: $(PROGRAMS) $(OUTPUT)perf$X 589strip: $(PROGRAMS) $(OUTPUT)perf
943 $(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf$X 590 $(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf
944 591
945$(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS 592$(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
946 $(QUIET_CC)$(CC) -DPERF_VERSION='"$(PERF_VERSION)"' \ 593 $(QUIET_CC)$(CC) -DPERF_VERSION='"$(PERF_VERSION)"' \
947 '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ 594 '-DPERF_HTML_PATH="$(htmldir_SQ)"' \
948 $(ALL_CFLAGS) -c $(filter %.c,$^) -o $@ 595 $(ALL_CFLAGS) -c $(filter %.c,$^) -o $@
949 596
950$(OUTPUT)perf$X: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS) 597$(OUTPUT)perf: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS)
951 $(QUIET_LINK)$(CC) $(ALL_CFLAGS) $(ALL_LDFLAGS) $(OUTPUT)perf.o \ 598 $(QUIET_LINK)$(CC) $(ALL_CFLAGS) $(ALL_LDFLAGS) $(OUTPUT)perf.o \
952 $(BUILTIN_OBJS) $(LIBS) -o $@ 599 $(BUILTIN_OBJS) $(LIBS) -o $@
953 600
@@ -963,39 +610,17 @@ $(OUTPUT)builtin-timechart.o: builtin-timechart.c $(OUTPUT)common-cmds.h $(OUTPU
963 '-DPERF_MAN_PATH="$(mandir_SQ)"' \ 610 '-DPERF_MAN_PATH="$(mandir_SQ)"' \
964 '-DPERF_INFO_PATH="$(infodir_SQ)"' $< 611 '-DPERF_INFO_PATH="$(infodir_SQ)"' $<
965 612
966$(BUILT_INS): $(OUTPUT)perf$X
967 $(QUIET_BUILT_IN)$(RM) $@ && \
968 ln perf$X $@ 2>/dev/null || \
969 ln -s perf$X $@ 2>/dev/null || \
970 cp perf$X $@
971
972$(OUTPUT)common-cmds.h: util/generate-cmdlist.sh command-list.txt 613$(OUTPUT)common-cmds.h: util/generate-cmdlist.sh command-list.txt
973 614
974$(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt) 615$(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt)
975 $(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@ 616 $(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@
976 617
977$(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh 618$(SCRIPTS) : % : %.sh
978 $(QUIET_GEN)$(RM) $(OUTPUT)$@ $(OUTPUT)$@+ && \ 619 $(QUIET_GEN)$(INSTALL) '$@.sh' '$(OUTPUT)$@'
979 sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \
980 -e 's|@SHELL_PATH@|$(SHELL_PATH_SQ)|' \
981 -e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \
982 -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \
983 -e 's/@@NO_CURL@@/$(NO_CURL)/g' \
984 $@.sh > $(OUTPUT)$@+ && \
985 chmod +x $(OUTPUT)$@+ && \
986 mv $(OUTPUT)$@+ $(OUTPUT)$@
987
988configure: configure.ac
989 $(QUIET_GEN)$(RM) $@ $<+ && \
990 sed -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \
991 $< > $<+ && \
992 autoconf -o $@ $<+ && \
993 $(RM) $<+
994 620
995# These can record PERF_VERSION 621# These can record PERF_VERSION
996$(OUTPUT)perf.o perf.spec \ 622$(OUTPUT)perf.o perf.spec \
997 $(patsubst %.sh,%,$(SCRIPT_SH)) \ 623 $(SCRIPTS) \
998 $(patsubst %.perl,%,$(SCRIPT_PERL)) \
999 : $(OUTPUT)PERF-VERSION-FILE 624 : $(OUTPUT)PERF-VERSION-FILE
1000 625
1001$(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS 626$(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS
@@ -1012,9 +637,6 @@ $(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS
1012 '-DPREFIX="$(prefix_SQ)"' \ 637 '-DPREFIX="$(prefix_SQ)"' \
1013 $< 638 $<
1014 639
1015$(OUTPUT)builtin-init-db.o: builtin-init-db.c $(OUTPUT)PERF-CFLAGS
1016 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DDEFAULT_PERF_TEMPLATE_DIR='"$(template_dir_SQ)"' $<
1017
1018$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS 640$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS
1019 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< 641 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
1020 642
@@ -1024,6 +646,9 @@ $(OUTPUT)util/ui/browser.o: util/ui/browser.c $(OUTPUT)PERF-CFLAGS
1024$(OUTPUT)util/ui/browsers/annotate.o: util/ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS 646$(OUTPUT)util/ui/browsers/annotate.o: util/ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS
1025 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $< 647 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $<
1026 648
649$(OUTPUT)util/ui/browsers/top.o: util/ui/browsers/top.c $(OUTPUT)PERF-CFLAGS
650 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $<
651
1027$(OUTPUT)util/ui/browsers/hists.o: util/ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS 652$(OUTPUT)util/ui/browsers/hists.o: util/ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS
1028 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $< 653 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $<
1029 654
@@ -1045,12 +670,11 @@ $(OUTPUT)util/scripting-engines/trace-event-python.o: util/scripting-engines/tra
1045$(OUTPUT)scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS 670$(OUTPUT)scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS
1046 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $< 671 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
1047 672
1048$(OUTPUT)perf-%$X: %.o $(PERFLIBS) 673$(OUTPUT)perf-%: %.o $(PERFLIBS)
1049 $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) 674 $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
1050 675
1051$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H) 676$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H)
1052$(patsubst perf-%$X,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h) 677$(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h)
1053builtin-revert.o wt-status.o: wt-status.h
1054 678
1055# we compile into subdirectories. if the target directory is not the source directory, they might not exists. So 679# we compile into subdirectories. if the target directory is not the source directory, they might not exists. So
1056# we depend the various files onto their directories. 680# we depend the various files onto their directories.
@@ -1063,6 +687,36 @@ $(sort $(dir $(DIRECTORY_DEPS))):
1063$(LIB_FILE): $(LIB_OBJS) 687$(LIB_FILE): $(LIB_OBJS)
1064 $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS) 688 $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS)
1065 689
690help:
691 @echo 'Perf make targets:'
692 @echo ' doc - make *all* documentation (see below)'
693 @echo ' man - make manpage documentation (access with man <foo>)'
694 @echo ' html - make html documentation'
695 @echo ' info - make GNU info documentation (access with info <foo>)'
696 @echo ' pdf - make pdf documentation'
697 @echo ' TAGS - use etags to make tag information for source browsing'
698 @echo ' tags - use ctags to make tag information for source browsing'
699 @echo ' cscope - use cscope to make interactive browsing database'
700 @echo ''
701 @echo 'Perf install targets:'
702 @echo ' NOTE: documentation build requires asciidoc, xmlto packages to be installed'
703 @echo ' HINT: use "make prefix=<path> <install target>" to install to a particular'
704 @echo ' path like make prefix=/usr/local install install-doc'
705 @echo ' install - install compiled binaries'
706 @echo ' install-doc - install *all* documentation'
707 @echo ' install-man - install manpage documentation'
708 @echo ' install-html - install html documentation'
709 @echo ' install-info - install GNU info documentation'
710 @echo ' install-pdf - install pdf documentation'
711 @echo ''
712 @echo ' quick-install-doc - alias for quick-install-man'
713 @echo ' quick-install-man - install the documentation quickly'
714 @echo ' quick-install-html - install the html documentation quickly'
715 @echo ''
716 @echo 'Perf maintainer targets:'
717 @echo ' distclean - alias to clean'
718 @echo ' clean - clean all binary objects and build output'
719
1066doc: 720doc:
1067 $(MAKE) -C Documentation all 721 $(MAKE) -C Documentation all
1068 722
@@ -1101,30 +755,12 @@ $(OUTPUT)PERF-CFLAGS: .FORCE-PERF-CFLAGS
1101 echo "$$FLAGS" >$(OUTPUT)PERF-CFLAGS; \ 755 echo "$$FLAGS" >$(OUTPUT)PERF-CFLAGS; \
1102 fi 756 fi
1103 757
1104# We need to apply sq twice, once to protect from the shell
1105# that runs $(OUTPUT)PERF-BUILD-OPTIONS, and then again to protect it
1106# and the first level quoting from the shell that runs "echo".
1107$(OUTPUT)PERF-BUILD-OPTIONS: .FORCE-PERF-BUILD-OPTIONS
1108 @echo SHELL_PATH=\''$(subst ','\'',$(SHELL_PATH_SQ))'\' >$@
1109 @echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@
1110 @echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@
1111 @echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@
1112
1113### Testing rules 758### Testing rules
1114 759
1115#
1116# None right now:
1117#
1118# TEST_PROGRAMS += test-something$X
1119
1120all:: $(TEST_PROGRAMS)
1121
1122# GNU make supports exporting all variables by "export" without parameters. 760# GNU make supports exporting all variables by "export" without parameters.
1123# However, the environment gets quite big, and some programs have problems 761# However, the environment gets quite big, and some programs have problems
1124# with that. 762# with that.
1125 763
1126export NO_SVN_TESTS
1127
1128check: $(OUTPUT)common-cmds.h 764check: $(OUTPUT)common-cmds.h
1129 if sparse; \ 765 if sparse; \
1130 then \ 766 then \
@@ -1133,33 +769,21 @@ check: $(OUTPUT)common-cmds.h
1133 sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; \ 769 sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; \
1134 done; \ 770 done; \
1135 else \ 771 else \
1136 echo 2>&1 "Did you mean 'make test'?"; \
1137 exit 1; \ 772 exit 1; \
1138 fi 773 fi
1139 774
1140remove-dashes:
1141 ./fixup-builtins $(BUILT_INS) $(PROGRAMS) $(SCRIPTS)
1142
1143### Installation rules 775### Installation rules
1144 776
1145ifneq ($(filter /%,$(firstword $(template_dir))),)
1146template_instdir = $(template_dir)
1147else
1148template_instdir = $(prefix)/$(template_dir)
1149endif
1150export template_instdir
1151
1152ifneq ($(filter /%,$(firstword $(perfexecdir))),) 777ifneq ($(filter /%,$(firstword $(perfexecdir))),)
1153perfexec_instdir = $(perfexecdir) 778perfexec_instdir = $(perfexecdir)
1154else 779else
1155perfexec_instdir = $(prefix)/$(perfexecdir) 780perfexec_instdir = $(prefix)/$(perfexecdir)
1156endif 781endif
1157perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir)) 782perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir))
1158export perfexec_instdir
1159 783
1160install: all 784install: all
1161 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' 785 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
1162 $(INSTALL) $(OUTPUT)perf$X '$(DESTDIR_SQ)$(bindir_SQ)' 786 $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'
1163 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' 787 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
1164 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' 788 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
1165 $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' 789 $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
@@ -1172,14 +796,6 @@ install: all
1172 $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python' 796 $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'
1173 $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' 797 $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
1174 798
1175ifdef BUILT_INS
1176 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
1177 $(INSTALL) $(BUILT_INS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
1178ifneq (,$X)
1179 $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) $(OUTPUT)perf$X)), $(RM) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$p';)
1180endif
1181endif
1182
1183install-doc: 799install-doc:
1184 $(MAKE) -C Documentation install 800 $(MAKE) -C Documentation install
1185 801
@@ -1204,104 +820,17 @@ quick-install-man:
1204quick-install-html: 820quick-install-html:
1205 $(MAKE) -C Documentation quick-install-html 821 $(MAKE) -C Documentation quick-install-html
1206 822
1207
1208### Maintainer's dist rules
1209#
1210# None right now
1211#
1212#
1213# perf.spec: perf.spec.in
1214# sed -e 's/@@VERSION@@/$(PERF_VERSION)/g' < $< > $@+
1215# mv $@+ $@
1216#
1217# PERF_TARNAME=perf-$(PERF_VERSION)
1218# dist: perf.spec perf-archive$(X) configure
1219# ./perf-archive --format=tar \
1220# --prefix=$(PERF_TARNAME)/ HEAD^{tree} > $(PERF_TARNAME).tar
1221# @mkdir -p $(PERF_TARNAME)
1222# @cp perf.spec configure $(PERF_TARNAME)
1223# @echo $(PERF_VERSION) > $(PERF_TARNAME)/version
1224# $(TAR) rf $(PERF_TARNAME).tar \
1225# $(PERF_TARNAME)/perf.spec \
1226# $(PERF_TARNAME)/configure \
1227# $(PERF_TARNAME)/version
1228# @$(RM) -r $(PERF_TARNAME)
1229# gzip -f -9 $(PERF_TARNAME).tar
1230#
1231# htmldocs = perf-htmldocs-$(PERF_VERSION)
1232# manpages = perf-manpages-$(PERF_VERSION)
1233# dist-doc:
1234# $(RM) -r .doc-tmp-dir
1235# mkdir .doc-tmp-dir
1236# $(MAKE) -C Documentation WEBDOC_DEST=../.doc-tmp-dir install-webdoc
1237# cd .doc-tmp-dir && $(TAR) cf ../$(htmldocs).tar .
1238# gzip -n -9 -f $(htmldocs).tar
1239# :
1240# $(RM) -r .doc-tmp-dir
1241# mkdir -p .doc-tmp-dir/man1 .doc-tmp-dir/man5 .doc-tmp-dir/man7
1242# $(MAKE) -C Documentation DESTDIR=./ \
1243# man1dir=../.doc-tmp-dir/man1 \
1244# man5dir=../.doc-tmp-dir/man5 \
1245# man7dir=../.doc-tmp-dir/man7 \
1246# install
1247# cd .doc-tmp-dir && $(TAR) cf ../$(manpages).tar .
1248# gzip -n -9 -f $(manpages).tar
1249# $(RM) -r .doc-tmp-dir
1250#
1251# rpm: dist
1252# $(RPMBUILD) -ta $(PERF_TARNAME).tar.gz
1253
1254### Cleaning rules 823### Cleaning rules
1255 824
1256distclean: clean
1257# $(RM) configure
1258
1259clean: 825clean:
1260 $(RM) *.o */*.o */*/*.o */*/*/*.o $(LIB_FILE) 826 $(RM) $(OUTPUT){*.o,*/*.o,*/*/*.o,*/*/*/*.o,$(LIB_FILE),perf-archive}
1261 $(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf$X 827 $(RM) $(ALL_PROGRAMS) perf
1262 $(RM) $(TEST_PROGRAMS)
1263 $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* 828 $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope*
1264 $(RM) -r autom4te.cache
1265 $(RM) config.log config.mak.autogen config.mak.append config.status config.cache
1266 $(RM) -r $(PERF_TARNAME) .doc-tmp-dir
1267 $(RM) $(PERF_TARNAME).tar.gz perf-core_$(PERF_VERSION)-*.tar.gz
1268 $(RM) $(htmldocs).tar.gz $(manpages).tar.gz
1269 $(MAKE) -C Documentation/ clean 829 $(MAKE) -C Documentation/ clean
1270 $(RM) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS $(OUTPUT)PERF-BUILD-OPTIONS 830 $(RM) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS
831 @python util/setup.py clean --build-lib='$(OUTPUT)python' \
832 --build-temp='$(OUTPUT)python/temp'
1271 833
1272.PHONY: all install clean strip 834.PHONY: all install clean strip
1273.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell 835.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
1274.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS 836.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS
1275.PHONY: .FORCE-PERF-BUILD-OPTIONS
1276
1277### Make sure built-ins do not have dups and listed in perf.c
1278#
1279check-builtins::
1280 ./check-builtins.sh
1281
1282### Test suite coverage testing
1283#
1284# None right now
1285#
1286# .PHONY: coverage coverage-clean coverage-build coverage-report
1287#
1288# coverage:
1289# $(MAKE) coverage-build
1290# $(MAKE) coverage-report
1291#
1292# coverage-clean:
1293# rm -f *.gcda *.gcno
1294#
1295# COVERAGE_CFLAGS = $(CFLAGS) -O0 -ftest-coverage -fprofile-arcs
1296# COVERAGE_LDFLAGS = $(CFLAGS) -O0 -lgcov
1297#
1298# coverage-build: coverage-clean
1299# $(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" all
1300# $(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" \
1301# -j1 test
1302#
1303# coverage-report:
1304# gcov -b *.c */*.c
1305# grep '^function.*called 0 ' *.c.gcov */*.c.gcov \
1306# | sed -e 's/\([^:]*\)\.gcov: *function \([^ ]*\) called.*/\1: \2/' \
1307# | tee coverage-untested-functions
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
index d9ab3ce446ac..0c7454f8b8a9 100644
--- a/tools/perf/bench/sched-pipe.c
+++ b/tools/perf/bench/sched-pipe.c
@@ -55,7 +55,7 @@ int bench_sched_pipe(int argc, const char **argv,
55 * discarding returned value of read(), write() 55 * discarding returned value of read(), write()
56 * causes error in building environment for perf 56 * causes error in building environment for perf
57 */ 57 */
58 int ret, wait_stat; 58 int __used ret, wait_stat;
59 pid_t pid, retpid; 59 pid_t pid, retpid;
60 60
61 argc = parse_options(argc, argv, options, 61 argc = parse_options(argc, argv, options,
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 8879463807e4..695de4b5ae63 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -9,6 +9,7 @@
9 9
10#include "util/util.h" 10#include "util/util.h"
11 11
12#include "util/util.h"
12#include "util/color.h" 13#include "util/color.h"
13#include <linux/list.h> 14#include <linux/list.h>
14#include "util/cache.h" 15#include "util/cache.h"
@@ -18,6 +19,9 @@
18#include "perf.h" 19#include "perf.h"
19#include "util/debug.h" 20#include "util/debug.h"
20 21
22#include "util/evlist.h"
23#include "util/evsel.h"
24#include "util/annotate.h"
21#include "util/event.h" 25#include "util/event.h"
22#include "util/parse-options.h" 26#include "util/parse-options.h"
23#include "util/parse-events.h" 27#include "util/parse-events.h"
@@ -36,9 +40,13 @@ static bool print_line;
36 40
37static const char *sym_hist_filter; 41static const char *sym_hist_filter;
38 42
39static int hists__add_entry(struct hists *self, struct addr_location *al) 43static int perf_evlist__add_sample(struct perf_evlist *evlist,
44 struct perf_sample *sample,
45 struct addr_location *al)
40{ 46{
47 struct perf_evsel *evsel;
41 struct hist_entry *he; 48 struct hist_entry *he;
49 int ret;
42 50
43 if (sym_hist_filter != NULL && 51 if (sym_hist_filter != NULL &&
44 (al->sym == NULL || strcmp(sym_hist_filter, al->sym->name) != 0)) { 52 (al->sym == NULL || strcmp(sym_hist_filter, al->sym->name) != 0)) {
@@ -51,25 +59,51 @@ static int hists__add_entry(struct hists *self, struct addr_location *al)
51 return 0; 59 return 0;
52 } 60 }
53 61
54 he = __hists__add_entry(self, al, NULL, 1); 62 evsel = perf_evlist__id2evsel(evlist, sample->id);
63 if (evsel == NULL) {
64 /*
65 * FIXME: Propagate this back, but at least we're in a builtin,
66 * where exit() is allowed. ;-)
67 */
68 ui__warning("Invalid %s file, contains samples with id not in "
69 "its header!\n", input_name);
70 exit_browser(0);
71 exit(1);
72 }
73
74 he = __hists__add_entry(&evsel->hists, al, NULL, 1);
55 if (he == NULL) 75 if (he == NULL)
56 return -ENOMEM; 76 return -ENOMEM;
57 77
58 return hist_entry__inc_addr_samples(he, al->addr); 78 ret = 0;
79 if (he->ms.sym != NULL) {
80 struct annotation *notes = symbol__annotation(he->ms.sym);
81 if (notes->src == NULL &&
82 symbol__alloc_hist(he->ms.sym, evlist->nr_entries) < 0)
83 return -ENOMEM;
84
85 ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
86 }
87
88 evsel->hists.stats.total_period += sample->period;
89 hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
90 return ret;
59} 91}
60 92
61static int process_sample_event(event_t *event, struct sample_data *sample, 93static int process_sample_event(union perf_event *event,
94 struct perf_sample *sample,
62 struct perf_session *session) 95 struct perf_session *session)
63{ 96{
64 struct addr_location al; 97 struct addr_location al;
65 98
66 if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) { 99 if (perf_event__preprocess_sample(event, session, &al, sample,
100 symbol__annotate_init) < 0) {
67 pr_warning("problem processing %d event, skipping it.\n", 101 pr_warning("problem processing %d event, skipping it.\n",
68 event->header.type); 102 event->header.type);
69 return -1; 103 return -1;
70 } 104 }
71 105
72 if (!al.filtered && hists__add_entry(&session->hists, &al)) { 106 if (!al.filtered && perf_evlist__add_sample(session->evlist, sample, &al)) {
73 pr_warning("problem incrementing symbol count, " 107 pr_warning("problem incrementing symbol count, "
74 "skipping event\n"); 108 "skipping event\n");
75 return -1; 109 return -1;
@@ -78,261 +112,26 @@ static int process_sample_event(event_t *event, struct sample_data *sample,
78 return 0; 112 return 0;
79} 113}
80 114
81static int objdump_line__print(struct objdump_line *self, 115static int hist_entry__tty_annotate(struct hist_entry *he, int evidx)
82 struct list_head *head,
83 struct hist_entry *he, u64 len)
84{
85 struct symbol *sym = he->ms.sym;
86 static const char *prev_line;
87 static const char *prev_color;
88
89 if (self->offset != -1) {
90 const char *path = NULL;
91 unsigned int hits = 0;
92 double percent = 0.0;
93 const char *color;
94 struct sym_priv *priv = symbol__priv(sym);
95 struct sym_ext *sym_ext = priv->ext;
96 struct sym_hist *h = priv->hist;
97 s64 offset = self->offset;
98 struct objdump_line *next = objdump__get_next_ip_line(head, self);
99
100 while (offset < (s64)len &&
101 (next == NULL || offset < next->offset)) {
102 if (sym_ext) {
103 if (path == NULL)
104 path = sym_ext[offset].path;
105 percent += sym_ext[offset].percent;
106 } else
107 hits += h->ip[offset];
108
109 ++offset;
110 }
111
112 if (sym_ext == NULL && h->sum)
113 percent = 100.0 * hits / h->sum;
114
115 color = get_percent_color(percent);
116
117 /*
118 * Also color the filename and line if needed, with
119 * the same color than the percentage. Don't print it
120 * twice for close colored ip with the same filename:line
121 */
122 if (path) {
123 if (!prev_line || strcmp(prev_line, path)
124 || color != prev_color) {
125 color_fprintf(stdout, color, " %s", path);
126 prev_line = path;
127 prev_color = color;
128 }
129 }
130
131 color_fprintf(stdout, color, " %7.2f", percent);
132 printf(" : ");
133 color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", self->line);
134 } else {
135 if (!*self->line)
136 printf(" :\n");
137 else
138 printf(" : %s\n", self->line);
139 }
140
141 return 0;
142}
143
144static struct rb_root root_sym_ext;
145
146static void insert_source_line(struct sym_ext *sym_ext)
147{
148 struct sym_ext *iter;
149 struct rb_node **p = &root_sym_ext.rb_node;
150 struct rb_node *parent = NULL;
151
152 while (*p != NULL) {
153 parent = *p;
154 iter = rb_entry(parent, struct sym_ext, node);
155
156 if (sym_ext->percent > iter->percent)
157 p = &(*p)->rb_left;
158 else
159 p = &(*p)->rb_right;
160 }
161
162 rb_link_node(&sym_ext->node, parent, p);
163 rb_insert_color(&sym_ext->node, &root_sym_ext);
164}
165
166static void free_source_line(struct hist_entry *he, int len)
167{
168 struct sym_priv *priv = symbol__priv(he->ms.sym);
169 struct sym_ext *sym_ext = priv->ext;
170 int i;
171
172 if (!sym_ext)
173 return;
174
175 for (i = 0; i < len; i++)
176 free(sym_ext[i].path);
177 free(sym_ext);
178
179 priv->ext = NULL;
180 root_sym_ext = RB_ROOT;
181}
182
183/* Get the filename:line for the colored entries */
184static void
185get_source_line(struct hist_entry *he, int len, const char *filename)
186{
187 struct symbol *sym = he->ms.sym;
188 u64 start;
189 int i;
190 char cmd[PATH_MAX * 2];
191 struct sym_ext *sym_ext;
192 struct sym_priv *priv = symbol__priv(sym);
193 struct sym_hist *h = priv->hist;
194
195 if (!h->sum)
196 return;
197
198 sym_ext = priv->ext = calloc(len, sizeof(struct sym_ext));
199 if (!priv->ext)
200 return;
201
202 start = he->ms.map->unmap_ip(he->ms.map, sym->start);
203
204 for (i = 0; i < len; i++) {
205 char *path = NULL;
206 size_t line_len;
207 u64 offset;
208 FILE *fp;
209
210 sym_ext[i].percent = 100.0 * h->ip[i] / h->sum;
211 if (sym_ext[i].percent <= 0.5)
212 continue;
213
214 offset = start + i;
215 sprintf(cmd, "addr2line -e %s %016" PRIx64, filename, offset);
216 fp = popen(cmd, "r");
217 if (!fp)
218 continue;
219
220 if (getline(&path, &line_len, fp) < 0 || !line_len)
221 goto next;
222
223 sym_ext[i].path = malloc(sizeof(char) * line_len + 1);
224 if (!sym_ext[i].path)
225 goto next;
226
227 strcpy(sym_ext[i].path, path);
228 insert_source_line(&sym_ext[i]);
229
230 next:
231 pclose(fp);
232 }
233}
234
235static void print_summary(const char *filename)
236{
237 struct sym_ext *sym_ext;
238 struct rb_node *node;
239
240 printf("\nSorted summary for file %s\n", filename);
241 printf("----------------------------------------------\n\n");
242
243 if (RB_EMPTY_ROOT(&root_sym_ext)) {
244 printf(" Nothing higher than %1.1f%%\n", MIN_GREEN);
245 return;
246 }
247
248 node = rb_first(&root_sym_ext);
249 while (node) {
250 double percent;
251 const char *color;
252 char *path;
253
254 sym_ext = rb_entry(node, struct sym_ext, node);
255 percent = sym_ext->percent;
256 color = get_percent_color(percent);
257 path = sym_ext->path;
258
259 color_fprintf(stdout, color, " %7.2f %s", percent, path);
260 node = rb_next(node);
261 }
262}
263
264static void hist_entry__print_hits(struct hist_entry *self)
265{
266 struct symbol *sym = self->ms.sym;
267 struct sym_priv *priv = symbol__priv(sym);
268 struct sym_hist *h = priv->hist;
269 u64 len = sym->end - sym->start, offset;
270
271 for (offset = 0; offset < len; ++offset)
272 if (h->ip[offset] != 0)
273 printf("%*" PRIx64 ": %" PRIu64 "\n", BITS_PER_LONG / 2,
274 sym->start + offset, h->ip[offset]);
275 printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->sum", h->sum);
276}
277
278static int hist_entry__tty_annotate(struct hist_entry *he)
279{ 116{
280 struct map *map = he->ms.map; 117 return symbol__tty_annotate(he->ms.sym, he->ms.map, evidx,
281 struct dso *dso = map->dso; 118 print_line, full_paths, 0, 0);
282 struct symbol *sym = he->ms.sym;
283 const char *filename = dso->long_name, *d_filename;
284 u64 len;
285 LIST_HEAD(head);
286 struct objdump_line *pos, *n;
287
288 if (hist_entry__annotate(he, &head, 0) < 0)
289 return -1;
290
291 if (full_paths)
292 d_filename = filename;
293 else
294 d_filename = basename(filename);
295
296 len = sym->end - sym->start;
297
298 if (print_line) {
299 get_source_line(he, len, filename);
300 print_summary(filename);
301 }
302
303 printf("\n\n------------------------------------------------\n");
304 printf(" Percent | Source code & Disassembly of %s\n", d_filename);
305 printf("------------------------------------------------\n");
306
307 if (verbose)
308 hist_entry__print_hits(he);
309
310 list_for_each_entry_safe(pos, n, &head, node) {
311 objdump_line__print(pos, &head, he, len);
312 list_del(&pos->node);
313 objdump_line__free(pos);
314 }
315
316 if (print_line)
317 free_source_line(he, len);
318
319 return 0;
320} 119}
321 120
322static void hists__find_annotations(struct hists *self) 121static void hists__find_annotations(struct hists *self, int evidx)
323{ 122{
324 struct rb_node *nd = rb_first(&self->entries), *next; 123 struct rb_node *nd = rb_first(&self->entries), *next;
325 int key = KEY_RIGHT; 124 int key = KEY_RIGHT;
326 125
327 while (nd) { 126 while (nd) {
328 struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); 127 struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
329 struct sym_priv *priv; 128 struct annotation *notes;
330 129
331 if (he->ms.sym == NULL || he->ms.map->dso->annotate_warned) 130 if (he->ms.sym == NULL || he->ms.map->dso->annotate_warned)
332 goto find_next; 131 goto find_next;
333 132
334 priv = symbol__priv(he->ms.sym); 133 notes = symbol__annotation(he->ms.sym);
335 if (priv->hist == NULL) { 134 if (notes->src == NULL) {
336find_next: 135find_next:
337 if (key == KEY_LEFT) 136 if (key == KEY_LEFT)
338 nd = rb_prev(nd); 137 nd = rb_prev(nd);
@@ -342,7 +141,7 @@ find_next:
342 } 141 }
343 142
344 if (use_browser > 0) { 143 if (use_browser > 0) {
345 key = hist_entry__tui_annotate(he); 144 key = hist_entry__tui_annotate(he, evidx);
346 switch (key) { 145 switch (key) {
347 case KEY_RIGHT: 146 case KEY_RIGHT:
348 next = rb_next(nd); 147 next = rb_next(nd);
@@ -357,24 +156,24 @@ find_next:
357 if (next != NULL) 156 if (next != NULL)
358 nd = next; 157 nd = next;
359 } else { 158 } else {
360 hist_entry__tty_annotate(he); 159 hist_entry__tty_annotate(he, evidx);
361 nd = rb_next(nd); 160 nd = rb_next(nd);
362 /* 161 /*
363 * Since we have a hist_entry per IP for the same 162 * Since we have a hist_entry per IP for the same
364 * symbol, free he->ms.sym->hist to signal we already 163 * symbol, free he->ms.sym->src to signal we already
365 * processed this symbol. 164 * processed this symbol.
366 */ 165 */
367 free(priv->hist); 166 free(notes->src);
368 priv->hist = NULL; 167 notes->src = NULL;
369 } 168 }
370 } 169 }
371} 170}
372 171
373static struct perf_event_ops event_ops = { 172static struct perf_event_ops event_ops = {
374 .sample = process_sample_event, 173 .sample = process_sample_event,
375 .mmap = event__process_mmap, 174 .mmap = perf_event__process_mmap,
376 .comm = event__process_comm, 175 .comm = perf_event__process_comm,
377 .fork = event__process_task, 176 .fork = perf_event__process_task,
378 .ordered_samples = true, 177 .ordered_samples = true,
379 .ordering_requires_timestamps = true, 178 .ordering_requires_timestamps = true,
380}; 179};
@@ -383,6 +182,8 @@ static int __cmd_annotate(void)
383{ 182{
384 int ret; 183 int ret;
385 struct perf_session *session; 184 struct perf_session *session;
185 struct perf_evsel *pos;
186 u64 total_nr_samples;
386 187
387 session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops); 188 session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops);
388 if (session == NULL) 189 if (session == NULL)
@@ -403,12 +204,36 @@ static int __cmd_annotate(void)
403 if (verbose > 2) 204 if (verbose > 2)
404 perf_session__fprintf_dsos(session, stdout); 205 perf_session__fprintf_dsos(session, stdout);
405 206
406 hists__collapse_resort(&session->hists); 207 total_nr_samples = 0;
407 hists__output_resort(&session->hists); 208 list_for_each_entry(pos, &session->evlist->entries, node) {
408 hists__find_annotations(&session->hists); 209 struct hists *hists = &pos->hists;
409out_delete: 210 u32 nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
410 perf_session__delete(session); 211
212 if (nr_samples > 0) {
213 total_nr_samples += nr_samples;
214 hists__collapse_resort(hists);
215 hists__output_resort(hists);
216 hists__find_annotations(hists, pos->idx);
217 }
218 }
411 219
220 if (total_nr_samples == 0) {
221 ui__warning("The %s file has no samples!\n", input_name);
222 goto out_delete;
223 }
224out_delete:
225 /*
226 * Speed up the exit process, for large files this can
227 * take quite a while.
228 *
229 * XXX Enable this when using valgrind or if we ever
230 * librarize this command.
231 *
232 * Also experiment with obstacks to see how much speed
233 * up we'll get here.
234 *
235 * perf_session__delete(session);
236 */
412 return ret; 237 return ret;
413} 238}
414 239
@@ -451,9 +276,9 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used)
451 else if (use_tui) 276 else if (use_tui)
452 use_browser = 1; 277 use_browser = 1;
453 278
454 setup_browser(); 279 setup_browser(true);
455 280
456 symbol_conf.priv_size = sizeof(struct sym_priv); 281 symbol_conf.priv_size = sizeof(struct annotation);
457 symbol_conf.try_vmlinux_path = true; 282 symbol_conf.try_vmlinux_path = true;
458 283
459 if (symbol__init() < 0) 284 if (symbol__init() < 0)
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 3153e492dbcc..6b7d91160ecb 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -30,13 +30,13 @@ static int hists__add_entry(struct hists *self,
30 return -ENOMEM; 30 return -ENOMEM;
31} 31}
32 32
33static int diff__process_sample_event(event_t *event, 33static int diff__process_sample_event(union perf_event *event,
34 struct sample_data *sample, 34 struct perf_sample *sample,
35 struct perf_session *session) 35 struct perf_session *session)
36{ 36{
37 struct addr_location al; 37 struct addr_location al;
38 38
39 if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) { 39 if (perf_event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
40 pr_warning("problem processing %d event, skipping it.\n", 40 pr_warning("problem processing %d event, skipping it.\n",
41 event->header.type); 41 event->header.type);
42 return -1; 42 return -1;
@@ -56,11 +56,11 @@ static int diff__process_sample_event(event_t *event,
56 56
57static struct perf_event_ops event_ops = { 57static struct perf_event_ops event_ops = {
58 .sample = diff__process_sample_event, 58 .sample = diff__process_sample_event,
59 .mmap = event__process_mmap, 59 .mmap = perf_event__process_mmap,
60 .comm = event__process_comm, 60 .comm = perf_event__process_comm,
61 .exit = event__process_task, 61 .exit = perf_event__process_task,
62 .fork = event__process_task, 62 .fork = perf_event__process_task,
63 .lost = event__process_lost, 63 .lost = perf_event__process_lost,
64 .ordered_samples = true, 64 .ordered_samples = true,
65 .ordering_requires_timestamps = true, 65 .ordering_requires_timestamps = true,
66}; 66};
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 0c78ffa7bf67..e29f04ed3396 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -16,8 +16,8 @@
16static char const *input_name = "-"; 16static char const *input_name = "-";
17static bool inject_build_ids; 17static bool inject_build_ids;
18 18
19static int event__repipe_synth(event_t *event, 19static int perf_event__repipe_synth(union perf_event *event,
20 struct perf_session *session __used) 20 struct perf_session *session __used)
21{ 21{
22 uint32_t size; 22 uint32_t size;
23 void *buf = event; 23 void *buf = event;
@@ -36,41 +36,44 @@ static int event__repipe_synth(event_t *event,
36 return 0; 36 return 0;
37} 37}
38 38
39static int event__repipe(event_t *event, struct sample_data *sample __used, 39static int perf_event__repipe(union perf_event *event,
40 struct perf_session *session) 40 struct perf_sample *sample __used,
41 struct perf_session *session)
41{ 42{
42 return event__repipe_synth(event, session); 43 return perf_event__repipe_synth(event, session);
43} 44}
44 45
45static int event__repipe_mmap(event_t *self, struct sample_data *sample, 46static int perf_event__repipe_mmap(union perf_event *event,
46 struct perf_session *session) 47 struct perf_sample *sample,
48 struct perf_session *session)
47{ 49{
48 int err; 50 int err;
49 51
50 err = event__process_mmap(self, sample, session); 52 err = perf_event__process_mmap(event, sample, session);
51 event__repipe(self, sample, session); 53 perf_event__repipe(event, sample, session);
52 54
53 return err; 55 return err;
54} 56}
55 57
56static int event__repipe_task(event_t *self, struct sample_data *sample, 58static int perf_event__repipe_task(union perf_event *event,
57 struct perf_session *session) 59 struct perf_sample *sample,
60 struct perf_session *session)
58{ 61{
59 int err; 62 int err;
60 63
61 err = event__process_task(self, sample, session); 64 err = perf_event__process_task(event, sample, session);
62 event__repipe(self, sample, session); 65 perf_event__repipe(event, sample, session);
63 66
64 return err; 67 return err;
65} 68}
66 69
67static int event__repipe_tracing_data(event_t *self, 70static int perf_event__repipe_tracing_data(union perf_event *event,
68 struct perf_session *session) 71 struct perf_session *session)
69{ 72{
70 int err; 73 int err;
71 74
72 event__repipe_synth(self, session); 75 perf_event__repipe_synth(event, session);
73 err = event__process_tracing_data(self, session); 76 err = perf_event__process_tracing_data(event, session);
74 77
75 return err; 78 return err;
76} 79}
@@ -109,8 +112,8 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session)
109 if (self->kernel) 112 if (self->kernel)
110 misc = PERF_RECORD_MISC_KERNEL; 113 misc = PERF_RECORD_MISC_KERNEL;
111 114
112 err = event__synthesize_build_id(self, misc, event__repipe, 115 err = perf_event__synthesize_build_id(self, misc, perf_event__repipe,
113 machine, session); 116 machine, session);
114 if (err) { 117 if (err) {
115 pr_err("Can't synthesize build_id event for %s\n", self->long_name); 118 pr_err("Can't synthesize build_id event for %s\n", self->long_name);
116 return -1; 119 return -1;
@@ -119,8 +122,9 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session)
119 return 0; 122 return 0;
120} 123}
121 124
122static int event__inject_buildid(event_t *event, struct sample_data *sample, 125static int perf_event__inject_buildid(union perf_event *event,
123 struct perf_session *session) 126 struct perf_sample *sample,
127 struct perf_session *session)
124{ 128{
125 struct addr_location al; 129 struct addr_location al;
126 struct thread *thread; 130 struct thread *thread;
@@ -155,24 +159,24 @@ static int event__inject_buildid(event_t *event, struct sample_data *sample,
155 } 159 }
156 160
157repipe: 161repipe:
158 event__repipe(event, sample, session); 162 perf_event__repipe(event, sample, session);
159 return 0; 163 return 0;
160} 164}
161 165
162struct perf_event_ops inject_ops = { 166struct perf_event_ops inject_ops = {
163 .sample = event__repipe, 167 .sample = perf_event__repipe,
164 .mmap = event__repipe, 168 .mmap = perf_event__repipe,
165 .comm = event__repipe, 169 .comm = perf_event__repipe,
166 .fork = event__repipe, 170 .fork = perf_event__repipe,
167 .exit = event__repipe, 171 .exit = perf_event__repipe,
168 .lost = event__repipe, 172 .lost = perf_event__repipe,
169 .read = event__repipe, 173 .read = perf_event__repipe,
170 .throttle = event__repipe, 174 .throttle = perf_event__repipe,
171 .unthrottle = event__repipe, 175 .unthrottle = perf_event__repipe,
172 .attr = event__repipe_synth, 176 .attr = perf_event__repipe_synth,
173 .event_type = event__repipe_synth, 177 .event_type = perf_event__repipe_synth,
174 .tracing_data = event__repipe_synth, 178 .tracing_data = perf_event__repipe_synth,
175 .build_id = event__repipe_synth, 179 .build_id = perf_event__repipe_synth,
176}; 180};
177 181
178extern volatile int session_done; 182extern volatile int session_done;
@@ -190,10 +194,10 @@ static int __cmd_inject(void)
190 signal(SIGINT, sig_handler); 194 signal(SIGINT, sig_handler);
191 195
192 if (inject_build_ids) { 196 if (inject_build_ids) {
193 inject_ops.sample = event__inject_buildid; 197 inject_ops.sample = perf_event__inject_buildid;
194 inject_ops.mmap = event__repipe_mmap; 198 inject_ops.mmap = perf_event__repipe_mmap;
195 inject_ops.fork = event__repipe_task; 199 inject_ops.fork = perf_event__repipe_task;
196 inject_ops.tracing_data = event__repipe_tracing_data; 200 inject_ops.tracing_data = perf_event__repipe_tracing_data;
197 } 201 }
198 202
199 session = perf_session__new(input_name, O_RDONLY, false, true, &inject_ops); 203 session = perf_session__new(input_name, O_RDONLY, false, true, &inject_ops);
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index d97256d65980..7f618f4e7b79 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -275,9 +275,8 @@ static void process_free_event(void *data,
275 s_alloc->alloc_cpu = -1; 275 s_alloc->alloc_cpu = -1;
276} 276}
277 277
278static void 278static void process_raw_event(union perf_event *raw_event __used, void *data,
279process_raw_event(event_t *raw_event __used, void *data, 279 int cpu, u64 timestamp, struct thread *thread)
280 int cpu, u64 timestamp, struct thread *thread)
281{ 280{
282 struct event *event; 281 struct event *event;
283 int type; 282 int type;
@@ -304,7 +303,8 @@ process_raw_event(event_t *raw_event __used, void *data,
304 } 303 }
305} 304}
306 305
307static int process_sample_event(event_t *event, struct sample_data *sample, 306static int process_sample_event(union perf_event *event,
307 struct perf_sample *sample,
308 struct perf_session *session) 308 struct perf_session *session)
309{ 309{
310 struct thread *thread = perf_session__findnew(session, event->ip.pid); 310 struct thread *thread = perf_session__findnew(session, event->ip.pid);
@@ -325,7 +325,7 @@ static int process_sample_event(event_t *event, struct sample_data *sample,
325 325
326static struct perf_event_ops event_ops = { 326static struct perf_event_ops event_ops = {
327 .sample = process_sample_event, 327 .sample = process_sample_event,
328 .comm = event__process_comm, 328 .comm = perf_event__process_comm,
329 .ordered_samples = true, 329 .ordered_samples = true,
330}; 330};
331 331
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index d88c6961274c..6313b6eb3ebb 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -5,6 +5,7 @@
5 * 5 *
6 * Copyright (C) 2009, Thomas Gleixner <tglx@linutronix.de> 6 * Copyright (C) 2009, Thomas Gleixner <tglx@linutronix.de>
7 * Copyright (C) 2008-2009, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 7 * Copyright (C) 2008-2009, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
8 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
8 */ 9 */
9#include "builtin.h" 10#include "builtin.h"
10 11
@@ -13,9 +14,47 @@
13#include "util/parse-events.h" 14#include "util/parse-events.h"
14#include "util/cache.h" 15#include "util/cache.h"
15 16
16int cmd_list(int argc __used, const char **argv __used, const char *prefix __used) 17int cmd_list(int argc, const char **argv, const char *prefix __used)
17{ 18{
18 setup_pager(); 19 setup_pager();
19 print_events(); 20
21 if (argc == 1)
22 print_events(NULL);
23 else {
24 int i;
25
26 for (i = 1; i < argc; ++i) {
27 if (i > 1)
28 putchar('\n');
29 if (strncmp(argv[i], "tracepoint", 10) == 0)
30 print_tracepoint_events(NULL, NULL);
31 else if (strcmp(argv[i], "hw") == 0 ||
32 strcmp(argv[i], "hardware") == 0)
33 print_events_type(PERF_TYPE_HARDWARE);
34 else if (strcmp(argv[i], "sw") == 0 ||
35 strcmp(argv[i], "software") == 0)
36 print_events_type(PERF_TYPE_SOFTWARE);
37 else if (strcmp(argv[i], "cache") == 0 ||
38 strcmp(argv[i], "hwcache") == 0)
39 print_hwcache_events(NULL);
40 else {
41 char *sep = strchr(argv[i], ':'), *s;
42 int sep_idx;
43
44 if (sep == NULL) {
45 print_events(argv[i]);
46 continue;
47 }
48 sep_idx = sep - argv[i];
49 s = strdup(argv[i]);
50 if (s == NULL)
51 return -1;
52
53 s[sep_idx] = '\0';
54 print_tracepoint_events(s, s + sep_idx + 1);
55 free(s);
56 }
57 }
58 }
20 return 0; 59 return 0;
21} 60}
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 2b36defc5d73..2e93f99b1480 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -834,14 +834,14 @@ static void dump_info(void)
834 die("Unknown type of information\n"); 834 die("Unknown type of information\n");
835} 835}
836 836
837static int process_sample_event(event_t *self, struct sample_data *sample, 837static int process_sample_event(union perf_event *event, struct perf_sample *sample,
838 struct perf_session *s) 838 struct perf_session *s)
839{ 839{
840 struct thread *thread = perf_session__findnew(s, sample->tid); 840 struct thread *thread = perf_session__findnew(s, sample->tid);
841 841
842 if (thread == NULL) { 842 if (thread == NULL) {
843 pr_debug("problem processing %d event, skipping it.\n", 843 pr_debug("problem processing %d event, skipping it.\n",
844 self->header.type); 844 event->header.type);
845 return -1; 845 return -1;
846 } 846 }
847 847
@@ -852,7 +852,7 @@ static int process_sample_event(event_t *self, struct sample_data *sample,
852 852
853static struct perf_event_ops eops = { 853static struct perf_event_ops eops = {
854 .sample = process_sample_event, 854 .sample = process_sample_event,
855 .comm = event__process_comm, 855 .comm = perf_event__process_comm,
856 .ordered_samples = true, 856 .ordered_samples = true,
857}; 857};
858 858
@@ -893,7 +893,7 @@ static const char * const report_usage[] = {
893 893
894static const struct option report_options[] = { 894static const struct option report_options[] = {
895 OPT_STRING('k', "key", &sort_key, "acquired", 895 OPT_STRING('k', "key", &sort_key, "acquired",
896 "key for sorting"), 896 "key for sorting (acquired / contended / wait_total / wait_max / wait_min)"),
897 /* TODO: type */ 897 /* TODO: type */
898 OPT_END() 898 OPT_END()
899}; 899};
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index add163c9f0e7..2c0e64d0b4aa 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -36,6 +36,7 @@
36#include "builtin.h" 36#include "builtin.h"
37#include "util/util.h" 37#include "util/util.h"
38#include "util/strlist.h" 38#include "util/strlist.h"
39#include "util/strfilter.h"
39#include "util/symbol.h" 40#include "util/symbol.h"
40#include "util/debug.h" 41#include "util/debug.h"
41#include "util/debugfs.h" 42#include "util/debugfs.h"
@@ -43,6 +44,8 @@
43#include "util/probe-finder.h" 44#include "util/probe-finder.h"
44#include "util/probe-event.h" 45#include "util/probe-event.h"
45 46
47#define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*"
48#define DEFAULT_FUNC_FILTER "!_*"
46#define MAX_PATH_LEN 256 49#define MAX_PATH_LEN 256
47 50
48/* Session management structure */ 51/* Session management structure */
@@ -52,6 +55,7 @@ static struct {
52 bool show_lines; 55 bool show_lines;
53 bool show_vars; 56 bool show_vars;
54 bool show_ext_vars; 57 bool show_ext_vars;
58 bool show_funcs;
55 bool mod_events; 59 bool mod_events;
56 int nevents; 60 int nevents;
57 struct perf_probe_event events[MAX_PROBES]; 61 struct perf_probe_event events[MAX_PROBES];
@@ -59,6 +63,7 @@ static struct {
59 struct line_range line_range; 63 struct line_range line_range;
60 const char *target_module; 64 const char *target_module;
61 int max_probe_points; 65 int max_probe_points;
66 struct strfilter *filter;
62} params; 67} params;
63 68
64/* Parse an event definition. Note that any error must die. */ 69/* Parse an event definition. Note that any error must die. */
@@ -157,6 +162,27 @@ static int opt_show_vars(const struct option *opt __used,
157} 162}
158#endif 163#endif
159 164
165static int opt_set_filter(const struct option *opt __used,
166 const char *str, int unset __used)
167{
168 const char *err;
169
170 if (str) {
171 pr_debug2("Set filter: %s\n", str);
172 if (params.filter)
173 strfilter__delete(params.filter);
174 params.filter = strfilter__new(str, &err);
175 if (!params.filter) {
176 pr_err("Filter parse error at %td.\n", err - str + 1);
177 pr_err("Source: \"%s\"\n", str);
178 pr_err(" %*c\n", (int)(err - str + 1), '^');
179 return -EINVAL;
180 }
181 }
182
183 return 0;
184}
185
160static const char * const probe_usage[] = { 186static const char * const probe_usage[] = {
161 "perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]", 187 "perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]",
162 "perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]", 188 "perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
@@ -221,6 +247,13 @@ static const struct option options[] = {
221 OPT__DRY_RUN(&probe_event_dry_run), 247 OPT__DRY_RUN(&probe_event_dry_run),
222 OPT_INTEGER('\0', "max-probes", &params.max_probe_points, 248 OPT_INTEGER('\0', "max-probes", &params.max_probe_points,
223 "Set how many probe points can be found for a probe."), 249 "Set how many probe points can be found for a probe."),
250 OPT_BOOLEAN('F', "funcs", &params.show_funcs,
251 "Show potential probe-able functions."),
252 OPT_CALLBACK('\0', "filter", NULL,
253 "[!]FILTER", "Set a filter (with --vars/funcs only)\n"
254 "\t\t\t(default: \"" DEFAULT_VAR_FILTER "\" for --vars,\n"
255 "\t\t\t \"" DEFAULT_FUNC_FILTER "\" for --funcs)",
256 opt_set_filter),
224 OPT_END() 257 OPT_END()
225}; 258};
226 259
@@ -246,7 +279,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
246 params.max_probe_points = MAX_PROBES; 279 params.max_probe_points = MAX_PROBES;
247 280
248 if ((!params.nevents && !params.dellist && !params.list_events && 281 if ((!params.nevents && !params.dellist && !params.list_events &&
249 !params.show_lines)) 282 !params.show_lines && !params.show_funcs))
250 usage_with_options(probe_usage, options); 283 usage_with_options(probe_usage, options);
251 284
252 /* 285 /*
@@ -267,12 +300,41 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
267 pr_err(" Error: Don't use --list with --vars.\n"); 300 pr_err(" Error: Don't use --list with --vars.\n");
268 usage_with_options(probe_usage, options); 301 usage_with_options(probe_usage, options);
269 } 302 }
303 if (params.show_funcs) {
304 pr_err(" Error: Don't use --list with --funcs.\n");
305 usage_with_options(probe_usage, options);
306 }
270 ret = show_perf_probe_events(); 307 ret = show_perf_probe_events();
271 if (ret < 0) 308 if (ret < 0)
272 pr_err(" Error: Failed to show event list. (%d)\n", 309 pr_err(" Error: Failed to show event list. (%d)\n",
273 ret); 310 ret);
274 return ret; 311 return ret;
275 } 312 }
313 if (params.show_funcs) {
314 if (params.nevents != 0 || params.dellist) {
315 pr_err(" Error: Don't use --funcs with"
316 " --add/--del.\n");
317 usage_with_options(probe_usage, options);
318 }
319 if (params.show_lines) {
320 pr_err(" Error: Don't use --funcs with --line.\n");
321 usage_with_options(probe_usage, options);
322 }
323 if (params.show_vars) {
324 pr_err(" Error: Don't use --funcs with --vars.\n");
325 usage_with_options(probe_usage, options);
326 }
327 if (!params.filter)
328 params.filter = strfilter__new(DEFAULT_FUNC_FILTER,
329 NULL);
330 ret = show_available_funcs(params.target_module,
331 params.filter);
332 strfilter__delete(params.filter);
333 if (ret < 0)
334 pr_err(" Error: Failed to show functions."
335 " (%d)\n", ret);
336 return ret;
337 }
276 338
277#ifdef DWARF_SUPPORT 339#ifdef DWARF_SUPPORT
278 if (params.show_lines) { 340 if (params.show_lines) {
@@ -297,10 +359,16 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
297 " --add/--del.\n"); 359 " --add/--del.\n");
298 usage_with_options(probe_usage, options); 360 usage_with_options(probe_usage, options);
299 } 361 }
362 if (!params.filter)
363 params.filter = strfilter__new(DEFAULT_VAR_FILTER,
364 NULL);
365
300 ret = show_available_vars(params.events, params.nevents, 366 ret = show_available_vars(params.events, params.nevents,
301 params.max_probe_points, 367 params.max_probe_points,
302 params.target_module, 368 params.target_module,
369 params.filter,
303 params.show_ext_vars); 370 params.show_ext_vars);
371 strfilter__delete(params.filter);
304 if (ret < 0) 372 if (ret < 0)
305 pr_err(" Error: Failed to show vars. (%d)\n", ret); 373 pr_err(" Error: Failed to show vars. (%d)\n", ret);
306 return ret; 374 return ret;
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 60cac6f92e8b..6febcc168a8c 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -18,11 +18,13 @@
18 18
19#include "util/header.h" 19#include "util/header.h"
20#include "util/event.h" 20#include "util/event.h"
21#include "util/evlist.h"
21#include "util/evsel.h" 22#include "util/evsel.h"
22#include "util/debug.h" 23#include "util/debug.h"
23#include "util/session.h" 24#include "util/session.h"
24#include "util/symbol.h" 25#include "util/symbol.h"
25#include "util/cpumap.h" 26#include "util/cpumap.h"
27#include "util/thread_map.h"
26 28
27#include <unistd.h> 29#include <unistd.h>
28#include <sched.h> 30#include <sched.h>
@@ -37,16 +39,14 @@ enum write_mode_t {
37 39
38static u64 user_interval = ULLONG_MAX; 40static u64 user_interval = ULLONG_MAX;
39static u64 default_interval = 0; 41static u64 default_interval = 0;
40static u64 sample_type;
41 42
42static struct cpu_map *cpus;
43static unsigned int page_size; 43static unsigned int page_size;
44static unsigned int mmap_pages = 128; 44static unsigned int mmap_pages = 128;
45static unsigned int user_freq = UINT_MAX; 45static unsigned int user_freq = UINT_MAX;
46static int freq = 1000; 46static int freq = 1000;
47static int output; 47static int output;
48static int pipe_output = 0; 48static int pipe_output = 0;
49static const char *output_name = "perf.data"; 49static const char *output_name = NULL;
50static int group = 0; 50static int group = 0;
51static int realtime_prio = 0; 51static int realtime_prio = 0;
52static bool nodelay = false; 52static bool nodelay = false;
@@ -55,7 +55,6 @@ static bool sample_id_all_avail = true;
55static bool system_wide = false; 55static bool system_wide = false;
56static pid_t target_pid = -1; 56static pid_t target_pid = -1;
57static pid_t target_tid = -1; 57static pid_t target_tid = -1;
58static struct thread_map *threads;
59static pid_t child_pid = -1; 58static pid_t child_pid = -1;
60static bool no_inherit = false; 59static bool no_inherit = false;
61static enum write_mode_t write_mode = WRITE_FORCE; 60static enum write_mode_t write_mode = WRITE_FORCE;
@@ -66,51 +65,17 @@ static bool sample_address = false;
66static bool sample_time = false; 65static bool sample_time = false;
67static bool no_buildid = false; 66static bool no_buildid = false;
68static bool no_buildid_cache = false; 67static bool no_buildid_cache = false;
68static struct perf_evlist *evsel_list;
69 69
70static long samples = 0; 70static long samples = 0;
71static u64 bytes_written = 0; 71static u64 bytes_written = 0;
72 72
73static struct pollfd *event_array;
74
75static int nr_poll = 0;
76static int nr_cpu = 0;
77
78static int file_new = 1; 73static int file_new = 1;
79static off_t post_processing_offset; 74static off_t post_processing_offset;
80 75
81static struct perf_session *session; 76static struct perf_session *session;
82static const char *cpu_list; 77static const char *cpu_list;
83 78
84struct mmap_data {
85 void *base;
86 unsigned int mask;
87 unsigned int prev;
88};
89
90static struct mmap_data mmap_array[MAX_NR_CPUS];
91
92static unsigned long mmap_read_head(struct mmap_data *md)
93{
94 struct perf_event_mmap_page *pc = md->base;
95 long head;
96
97 head = pc->data_head;
98 rmb();
99
100 return head;
101}
102
103static void mmap_write_tail(struct mmap_data *md, unsigned long tail)
104{
105 struct perf_event_mmap_page *pc = md->base;
106
107 /*
108 * ensure all reads are done before we write the tail out.
109 */
110 /* mb(); */
111 pc->data_tail = tail;
112}
113
114static void advance_output(size_t size) 79static void advance_output(size_t size)
115{ 80{
116 bytes_written += size; 81 bytes_written += size;
@@ -131,42 +96,26 @@ static void write_output(void *buf, size_t size)
131 } 96 }
132} 97}
133 98
134static int process_synthesized_event(event_t *event, 99static int process_synthesized_event(union perf_event *event,
135 struct sample_data *sample __used, 100 struct perf_sample *sample __used,
136 struct perf_session *self __used) 101 struct perf_session *self __used)
137{ 102{
138 write_output(event, event->header.size); 103 write_output(event, event->header.size);
139 return 0; 104 return 0;
140} 105}
141 106
142static void mmap_read(struct mmap_data *md) 107static void mmap_read(struct perf_mmap *md)
143{ 108{
144 unsigned int head = mmap_read_head(md); 109 unsigned int head = perf_mmap__read_head(md);
145 unsigned int old = md->prev; 110 unsigned int old = md->prev;
146 unsigned char *data = md->base + page_size; 111 unsigned char *data = md->base + page_size;
147 unsigned long size; 112 unsigned long size;
148 void *buf; 113 void *buf;
149 int diff;
150 114
151 /* 115 if (old == head)
152 * If we're further behind than half the buffer, there's a chance 116 return;
153 * the writer will bite our tail and mess up the samples under us.
154 *
155 * If we somehow ended up ahead of the head, we got messed up.
156 *
157 * In either case, truncate and restart at head.
158 */
159 diff = head - old;
160 if (diff < 0) {
161 fprintf(stderr, "WARNING: failed to keep up with mmap data\n");
162 /*
163 * head points to a known good entry, start there.
164 */
165 old = head;
166 }
167 117
168 if (old != head) 118 samples++;
169 samples++;
170 119
171 size = head - old; 120 size = head - old;
172 121
@@ -185,7 +134,7 @@ static void mmap_read(struct mmap_data *md)
185 write_output(buf, size); 134 write_output(buf, size);
186 135
187 md->prev = old; 136 md->prev = old;
188 mmap_write_tail(md, old); 137 perf_mmap__write_tail(md, old);
189} 138}
190 139
191static volatile int done = 0; 140static volatile int done = 0;
@@ -209,53 +158,10 @@ static void sig_atexit(void)
209 kill(getpid(), signr); 158 kill(getpid(), signr);
210} 159}
211 160
212static int group_fd; 161static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
213
214static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int nr)
215{
216 struct perf_header_attr *h_attr;
217
218 if (nr < session->header.attrs) {
219 h_attr = session->header.attr[nr];
220 } else {
221 h_attr = perf_header_attr__new(a);
222 if (h_attr != NULL)
223 if (perf_header__add_attr(&session->header, h_attr) < 0) {
224 perf_header_attr__delete(h_attr);
225 h_attr = NULL;
226 }
227 }
228
229 return h_attr;
230}
231
232static void create_counter(struct perf_evsel *evsel, int cpu)
233{ 162{
234 char *filter = evsel->filter;
235 struct perf_event_attr *attr = &evsel->attr; 163 struct perf_event_attr *attr = &evsel->attr;
236 struct perf_header_attr *h_attr;
237 int track = !evsel->idx; /* only the first counter needs these */ 164 int track = !evsel->idx; /* only the first counter needs these */
238 int thread_index;
239 int ret;
240 struct {
241 u64 count;
242 u64 time_enabled;
243 u64 time_running;
244 u64 id;
245 } read_data;
246 /*
247 * Check if parse_single_tracepoint_event has already asked for
248 * PERF_SAMPLE_TIME.
249 *
250 * XXX this is kludgy but short term fix for problems introduced by
251 * eac23d1c that broke 'perf script' by having different sample_types
252 * when using multiple tracepoint events when we use a perf binary
253 * that tries to use sample_id_all on an older kernel.
254 *
255 * We need to move counter creation to perf_session, support
256 * different sample_types, etc.
257 */
258 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
259 165
260 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 166 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
261 PERF_FORMAT_TOTAL_TIME_RUNNING | 167 PERF_FORMAT_TOTAL_TIME_RUNNING |
@@ -263,7 +169,7 @@ static void create_counter(struct perf_evsel *evsel, int cpu)
263 169
264 attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID; 170 attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
265 171
266 if (nr_counters > 1) 172 if (evlist->nr_entries > 1)
267 attr->sample_type |= PERF_SAMPLE_ID; 173 attr->sample_type |= PERF_SAMPLE_ID;
268 174
269 /* 175 /*
@@ -315,19 +221,58 @@ static void create_counter(struct perf_evsel *evsel, int cpu)
315 221
316 attr->mmap = track; 222 attr->mmap = track;
317 attr->comm = track; 223 attr->comm = track;
318 attr->inherit = !no_inherit; 224
319 if (target_pid == -1 && target_tid == -1 && !system_wide) { 225 if (target_pid == -1 && target_tid == -1 && !system_wide) {
320 attr->disabled = 1; 226 attr->disabled = 1;
321 attr->enable_on_exec = 1; 227 attr->enable_on_exec = 1;
322 } 228 }
323retry_sample_id: 229}
324 attr->sample_id_all = sample_id_all_avail ? 1 : 0;
325 230
326 for (thread_index = 0; thread_index < threads->nr; thread_index++) { 231static bool perf_evlist__equal(struct perf_evlist *evlist,
327try_again: 232 struct perf_evlist *other)
328 FD(evsel, nr_cpu, thread_index) = sys_perf_event_open(attr, threads->map[thread_index], cpu, group_fd, 0); 233{
234 struct perf_evsel *pos, *pair;
235
236 if (evlist->nr_entries != other->nr_entries)
237 return false;
238
239 pair = list_entry(other->entries.next, struct perf_evsel, node);
329 240
330 if (FD(evsel, nr_cpu, thread_index) < 0) { 241 list_for_each_entry(pos, &evlist->entries, node) {
242 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
243 return false;
244 pair = list_entry(pair->node.next, struct perf_evsel, node);
245 }
246
247 return true;
248}
249
250static void open_counters(struct perf_evlist *evlist)
251{
252 struct perf_evsel *pos;
253
254 list_for_each_entry(pos, &evlist->entries, node) {
255 struct perf_event_attr *attr = &pos->attr;
256 /*
257 * Check if parse_single_tracepoint_event has already asked for
258 * PERF_SAMPLE_TIME.
259 *
260 * XXX this is kludgy but short term fix for problems introduced by
261 * eac23d1c that broke 'perf script' by having different sample_types
262 * when using multiple tracepoint events when we use a perf binary
263 * that tries to use sample_id_all on an older kernel.
264 *
265 * We need to move counter creation to perf_session, support
266 * different sample_types, etc.
267 */
268 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
269
270 config_attr(pos, evlist);
271retry_sample_id:
272 attr->sample_id_all = sample_id_all_avail ? 1 : 0;
273try_again:
274 if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group,
275 !no_inherit) < 0) {
331 int err = errno; 276 int err = errno;
332 277
333 if (err == EPERM || err == EACCES) 278 if (err == EPERM || err == EACCES)
@@ -364,7 +309,7 @@ try_again:
364 } 309 }
365 printf("\n"); 310 printf("\n");
366 error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n", 311 error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
367 FD(evsel, nr_cpu, thread_index), strerror(err)); 312 err, strerror(err));
368 313
369#if defined(__i386__) || defined(__x86_64__) 314#if defined(__i386__) || defined(__x86_64__)
370 if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP) 315 if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
@@ -375,90 +320,28 @@ try_again:
375#endif 320#endif
376 321
377 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 322 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
378 exit(-1);
379 } 323 }
324 }
380 325
381 h_attr = get_header_attr(attr, evsel->idx); 326 if (perf_evlist__set_filters(evlist)) {
382 if (h_attr == NULL) 327 error("failed to set filter with %d (%s)\n", errno,
383 die("nomem\n"); 328 strerror(errno));
329 exit(-1);
330 }
384 331
385 if (!file_new) { 332 if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
386 if (memcmp(&h_attr->attr, attr, sizeof(*attr))) { 333 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
387 fprintf(stderr, "incompatible append\n");
388 exit(-1);
389 }
390 }
391 334
392 if (read(FD(evsel, nr_cpu, thread_index), &read_data, sizeof(read_data)) == -1) { 335 if (file_new)
393 perror("Unable to read perf file descriptor"); 336 session->evlist = evlist;
337 else {
338 if (!perf_evlist__equal(session->evlist, evlist)) {
339 fprintf(stderr, "incompatible append\n");
394 exit(-1); 340 exit(-1);
395 } 341 }
342 }
396 343
397 if (perf_header_attr__add_id(h_attr, read_data.id) < 0) { 344 perf_session__update_sample_type(session);
398 pr_warning("Not enough memory to add id\n");
399 exit(-1);
400 }
401
402 assert(FD(evsel, nr_cpu, thread_index) >= 0);
403 fcntl(FD(evsel, nr_cpu, thread_index), F_SETFL, O_NONBLOCK);
404
405 /*
406 * First counter acts as the group leader:
407 */
408 if (group && group_fd == -1)
409 group_fd = FD(evsel, nr_cpu, thread_index);
410
411 if (evsel->idx || thread_index) {
412 struct perf_evsel *first;
413 first = list_entry(evsel_list.next, struct perf_evsel, node);
414 ret = ioctl(FD(evsel, nr_cpu, thread_index),
415 PERF_EVENT_IOC_SET_OUTPUT,
416 FD(first, nr_cpu, 0));
417 if (ret) {
418 error("failed to set output: %d (%s)\n", errno,
419 strerror(errno));
420 exit(-1);
421 }
422 } else {
423 mmap_array[nr_cpu].prev = 0;
424 mmap_array[nr_cpu].mask = mmap_pages*page_size - 1;
425 mmap_array[nr_cpu].base = mmap(NULL, (mmap_pages+1)*page_size,
426 PROT_READ | PROT_WRITE, MAP_SHARED, FD(evsel, nr_cpu, thread_index), 0);
427 if (mmap_array[nr_cpu].base == MAP_FAILED) {
428 error("failed to mmap with %d (%s)\n", errno, strerror(errno));
429 exit(-1);
430 }
431
432 event_array[nr_poll].fd = FD(evsel, nr_cpu, thread_index);
433 event_array[nr_poll].events = POLLIN;
434 nr_poll++;
435 }
436
437 if (filter != NULL) {
438 ret = ioctl(FD(evsel, nr_cpu, thread_index),
439 PERF_EVENT_IOC_SET_FILTER, filter);
440 if (ret) {
441 error("failed to set filter with %d (%s)\n", errno,
442 strerror(errno));
443 exit(-1);
444 }
445 }
446 }
447
448 if (!sample_type)
449 sample_type = attr->sample_type;
450}
451
452static void open_counters(int cpu)
453{
454 struct perf_evsel *pos;
455
456 group_fd = -1;
457
458 list_for_each_entry(pos, &evsel_list, node)
459 create_counter(pos, cpu);
460
461 nr_cpu++;
462} 345}
463 346
464static int process_buildids(void) 347static int process_buildids(void)
@@ -481,14 +364,14 @@ static void atexit_header(void)
481 364
482 if (!no_buildid) 365 if (!no_buildid)
483 process_buildids(); 366 process_buildids();
484 perf_header__write(&session->header, output, true); 367 perf_session__write_header(session, evsel_list, output, true);
485 perf_session__delete(session); 368 perf_session__delete(session);
486 perf_evsel_list__delete(); 369 perf_evlist__delete(evsel_list);
487 symbol__exit(); 370 symbol__exit();
488 } 371 }
489} 372}
490 373
491static void event__synthesize_guest_os(struct machine *machine, void *data) 374static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
492{ 375{
493 int err; 376 int err;
494 struct perf_session *psession = data; 377 struct perf_session *psession = data;
@@ -504,8 +387,8 @@ static void event__synthesize_guest_os(struct machine *machine, void *data)
504 *method is used to avoid symbol missing when the first addr is 387 *method is used to avoid symbol missing when the first addr is
505 *in module instead of in guest kernel. 388 *in module instead of in guest kernel.
506 */ 389 */
507 err = event__synthesize_modules(process_synthesized_event, 390 err = perf_event__synthesize_modules(process_synthesized_event,
508 psession, machine); 391 psession, machine);
509 if (err < 0) 392 if (err < 0)
510 pr_err("Couldn't record guest kernel [%d]'s reference" 393 pr_err("Couldn't record guest kernel [%d]'s reference"
511 " relocation symbol.\n", machine->pid); 394 " relocation symbol.\n", machine->pid);
@@ -514,11 +397,12 @@ static void event__synthesize_guest_os(struct machine *machine, void *data)
514 * We use _stext for guest kernel because guest kernel's /proc/kallsyms 397 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
515 * have no _text sometimes. 398 * have no _text sometimes.
516 */ 399 */
517 err = event__synthesize_kernel_mmap(process_synthesized_event, 400 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
518 psession, machine, "_text"); 401 psession, machine, "_text");
519 if (err < 0) 402 if (err < 0)
520 err = event__synthesize_kernel_mmap(process_synthesized_event, 403 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
521 psession, machine, "_stext"); 404 psession, machine,
405 "_stext");
522 if (err < 0) 406 if (err < 0)
523 pr_err("Couldn't record guest kernel [%d]'s reference" 407 pr_err("Couldn't record guest kernel [%d]'s reference"
524 " relocation symbol.\n", machine->pid); 408 " relocation symbol.\n", machine->pid);
@@ -533,9 +417,9 @@ static void mmap_read_all(void)
533{ 417{
534 int i; 418 int i;
535 419
536 for (i = 0; i < nr_cpu; i++) { 420 for (i = 0; i < evsel_list->cpus->nr; i++) {
537 if (mmap_array[i].base) 421 if (evsel_list->mmap[i].base)
538 mmap_read(&mmap_array[i]); 422 mmap_read(&evsel_list->mmap[i]);
539 } 423 }
540 424
541 if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO)) 425 if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
@@ -566,18 +450,26 @@ static int __cmd_record(int argc, const char **argv)
566 exit(-1); 450 exit(-1);
567 } 451 }
568 452
569 if (!strcmp(output_name, "-")) 453 if (!output_name) {
570 pipe_output = 1; 454 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
571 else if (!stat(output_name, &st) && st.st_size) { 455 pipe_output = 1;
572 if (write_mode == WRITE_FORCE) { 456 else
573 char oldname[PATH_MAX]; 457 output_name = "perf.data";
574 snprintf(oldname, sizeof(oldname), "%s.old", 458 }
575 output_name); 459 if (output_name) {
576 unlink(oldname); 460 if (!strcmp(output_name, "-"))
577 rename(output_name, oldname); 461 pipe_output = 1;
462 else if (!stat(output_name, &st) && st.st_size) {
463 if (write_mode == WRITE_FORCE) {
464 char oldname[PATH_MAX];
465 snprintf(oldname, sizeof(oldname), "%s.old",
466 output_name);
467 unlink(oldname);
468 rename(output_name, oldname);
469 }
470 } else if (write_mode == WRITE_APPEND) {
471 write_mode = WRITE_FORCE;
578 } 472 }
579 } else if (write_mode == WRITE_APPEND) {
580 write_mode = WRITE_FORCE;
581 } 473 }
582 474
583 flags = O_CREAT|O_RDWR; 475 flags = O_CREAT|O_RDWR;
@@ -606,19 +498,14 @@ static int __cmd_record(int argc, const char **argv)
606 perf_header__set_feat(&session->header, HEADER_BUILD_ID); 498 perf_header__set_feat(&session->header, HEADER_BUILD_ID);
607 499
608 if (!file_new) { 500 if (!file_new) {
609 err = perf_header__read(session, output); 501 err = perf_session__read_header(session, output);
610 if (err < 0) 502 if (err < 0)
611 goto out_delete_session; 503 goto out_delete_session;
612 } 504 }
613 505
614 if (have_tracepoints(&evsel_list)) 506 if (have_tracepoints(&evsel_list->entries))
615 perf_header__set_feat(&session->header, HEADER_TRACE_INFO); 507 perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
616 508
617 /*
618 * perf_session__delete(session) will be called at atexit_header()
619 */
620 atexit(atexit_header);
621
622 if (forks) { 509 if (forks) {
623 child_pid = fork(); 510 child_pid = fork();
624 if (child_pid < 0) { 511 if (child_pid < 0) {
@@ -659,7 +546,7 @@ static int __cmd_record(int argc, const char **argv)
659 } 546 }
660 547
661 if (!system_wide && target_tid == -1 && target_pid == -1) 548 if (!system_wide && target_tid == -1 && target_pid == -1)
662 threads->map[0] = child_pid; 549 evsel_list->threads->map[0] = child_pid;
663 550
664 close(child_ready_pipe[1]); 551 close(child_ready_pipe[1]);
665 close(go_pipe[0]); 552 close(go_pipe[0]);
@@ -673,46 +560,42 @@ static int __cmd_record(int argc, const char **argv)
673 close(child_ready_pipe[0]); 560 close(child_ready_pipe[0]);
674 } 561 }
675 562
676 if (!system_wide && no_inherit && !cpu_list) { 563 open_counters(evsel_list);
677 open_counters(-1);
678 } else {
679 for (i = 0; i < cpus->nr; i++)
680 open_counters(cpus->map[i]);
681 }
682 564
683 perf_session__set_sample_type(session, sample_type); 565 /*
566 * perf_session__delete(session) will be called at atexit_header()
567 */
568 atexit(atexit_header);
684 569
685 if (pipe_output) { 570 if (pipe_output) {
686 err = perf_header__write_pipe(output); 571 err = perf_header__write_pipe(output);
687 if (err < 0) 572 if (err < 0)
688 return err; 573 return err;
689 } else if (file_new) { 574 } else if (file_new) {
690 err = perf_header__write(&session->header, output, false); 575 err = perf_session__write_header(session, evsel_list,
576 output, false);
691 if (err < 0) 577 if (err < 0)
692 return err; 578 return err;
693 } 579 }
694 580
695 post_processing_offset = lseek(output, 0, SEEK_CUR); 581 post_processing_offset = lseek(output, 0, SEEK_CUR);
696 582
697 perf_session__set_sample_id_all(session, sample_id_all_avail);
698
699 if (pipe_output) { 583 if (pipe_output) {
700 err = event__synthesize_attrs(&session->header, 584 err = perf_session__synthesize_attrs(session,
701 process_synthesized_event, 585 process_synthesized_event);
702 session);
703 if (err < 0) { 586 if (err < 0) {
704 pr_err("Couldn't synthesize attrs.\n"); 587 pr_err("Couldn't synthesize attrs.\n");
705 return err; 588 return err;
706 } 589 }
707 590
708 err = event__synthesize_event_types(process_synthesized_event, 591 err = perf_event__synthesize_event_types(process_synthesized_event,
709 session); 592 session);
710 if (err < 0) { 593 if (err < 0) {
711 pr_err("Couldn't synthesize event_types.\n"); 594 pr_err("Couldn't synthesize event_types.\n");
712 return err; 595 return err;
713 } 596 }
714 597
715 if (have_tracepoints(&evsel_list)) { 598 if (have_tracepoints(&evsel_list->entries)) {
716 /* 599 /*
717 * FIXME err <= 0 here actually means that 600 * FIXME err <= 0 here actually means that
718 * there were no tracepoints so its not really 601 * there were no tracepoints so its not really
@@ -721,9 +604,9 @@ static int __cmd_record(int argc, const char **argv)
721 * return this more properly and also 604 * return this more properly and also
722 * propagate errors that now are calling die() 605 * propagate errors that now are calling die()
723 */ 606 */
724 err = event__synthesize_tracing_data(output, &evsel_list, 607 err = perf_event__synthesize_tracing_data(output, evsel_list,
725 process_synthesized_event, 608 process_synthesized_event,
726 session); 609 session);
727 if (err <= 0) { 610 if (err <= 0) {
728 pr_err("Couldn't record tracing data.\n"); 611 pr_err("Couldn't record tracing data.\n");
729 return err; 612 return err;
@@ -738,31 +621,34 @@ static int __cmd_record(int argc, const char **argv)
738 return -1; 621 return -1;
739 } 622 }
740 623
741 err = event__synthesize_kernel_mmap(process_synthesized_event, 624 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
742 session, machine, "_text"); 625 session, machine, "_text");
743 if (err < 0) 626 if (err < 0)
744 err = event__synthesize_kernel_mmap(process_synthesized_event, 627 err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
745 session, machine, "_stext"); 628 session, machine, "_stext");
746 if (err < 0) 629 if (err < 0)
747 pr_err("Couldn't record kernel reference relocation symbol\n" 630 pr_err("Couldn't record kernel reference relocation symbol\n"
748 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 631 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
749 "Check /proc/kallsyms permission or run as root.\n"); 632 "Check /proc/kallsyms permission or run as root.\n");
750 633
751 err = event__synthesize_modules(process_synthesized_event, 634 err = perf_event__synthesize_modules(process_synthesized_event,
752 session, machine); 635 session, machine);
753 if (err < 0) 636 if (err < 0)
754 pr_err("Couldn't record kernel module information.\n" 637 pr_err("Couldn't record kernel module information.\n"
755 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" 638 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
756 "Check /proc/modules permission or run as root.\n"); 639 "Check /proc/modules permission or run as root.\n");
757 640
758 if (perf_guest) 641 if (perf_guest)
759 perf_session__process_machines(session, event__synthesize_guest_os); 642 perf_session__process_machines(session,
643 perf_event__synthesize_guest_os);
760 644
761 if (!system_wide) 645 if (!system_wide)
762 event__synthesize_thread_map(threads, process_synthesized_event, 646 perf_event__synthesize_thread_map(evsel_list->threads,
763 session); 647 process_synthesized_event,
648 session);
764 else 649 else
765 event__synthesize_threads(process_synthesized_event, session); 650 perf_event__synthesize_threads(process_synthesized_event,
651 session);
766 652
767 if (realtime_prio) { 653 if (realtime_prio) {
768 struct sched_param param; 654 struct sched_param param;
@@ -789,17 +675,17 @@ static int __cmd_record(int argc, const char **argv)
789 if (hits == samples) { 675 if (hits == samples) {
790 if (done) 676 if (done)
791 break; 677 break;
792 err = poll(event_array, nr_poll, -1); 678 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
793 waking++; 679 waking++;
794 } 680 }
795 681
796 if (done) { 682 if (done) {
797 for (i = 0; i < nr_cpu; i++) { 683 for (i = 0; i < evsel_list->cpus->nr; i++) {
798 struct perf_evsel *pos; 684 struct perf_evsel *pos;
799 685
800 list_for_each_entry(pos, &evsel_list, node) { 686 list_for_each_entry(pos, &evsel_list->entries, node) {
801 for (thread = 0; 687 for (thread = 0;
802 thread < threads->nr; 688 thread < evsel_list->threads->nr;
803 thread++) 689 thread++)
804 ioctl(FD(pos, i, thread), 690 ioctl(FD(pos, i, thread),
805 PERF_EVENT_IOC_DISABLE); 691 PERF_EVENT_IOC_DISABLE);
@@ -838,10 +724,10 @@ static const char * const record_usage[] = {
838static bool force, append_file; 724static bool force, append_file;
839 725
840const struct option record_options[] = { 726const struct option record_options[] = {
841 OPT_CALLBACK('e', "event", NULL, "event", 727 OPT_CALLBACK('e', "event", &evsel_list, "event",
842 "event selector. use 'perf list' to list available events", 728 "event selector. use 'perf list' to list available events",
843 parse_events), 729 parse_events),
844 OPT_CALLBACK(0, "filter", NULL, "filter", 730 OPT_CALLBACK(0, "filter", &evsel_list, "filter",
845 "event filter", parse_filter), 731 "event filter", parse_filter),
846 OPT_INTEGER('p', "pid", &target_pid, 732 OPT_INTEGER('p', "pid", &target_pid,
847 "record events on existing process id"), 733 "record events on existing process id"),
@@ -884,6 +770,9 @@ const struct option record_options[] = {
884 "do not update the buildid cache"), 770 "do not update the buildid cache"),
885 OPT_BOOLEAN('B', "no-buildid", &no_buildid, 771 OPT_BOOLEAN('B', "no-buildid", &no_buildid,
886 "do not collect buildids in perf.data"), 772 "do not collect buildids in perf.data"),
773 OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
774 "monitor event in cgroup name only",
775 parse_cgroups),
887 OPT_END() 776 OPT_END()
888}; 777};
889 778
@@ -892,6 +781,10 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
892 int err = -ENOMEM; 781 int err = -ENOMEM;
893 struct perf_evsel *pos; 782 struct perf_evsel *pos;
894 783
784 evsel_list = perf_evlist__new(NULL, NULL);
785 if (evsel_list == NULL)
786 return -ENOMEM;
787
895 argc = parse_options(argc, argv, record_options, record_usage, 788 argc = parse_options(argc, argv, record_options, record_usage,
896 PARSE_OPT_STOP_AT_NON_OPTION); 789 PARSE_OPT_STOP_AT_NON_OPTION);
897 if (!argc && target_pid == -1 && target_tid == -1 && 790 if (!argc && target_pid == -1 && target_tid == -1 &&
@@ -908,12 +801,19 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
908 write_mode = WRITE_FORCE; 801 write_mode = WRITE_FORCE;
909 } 802 }
910 803
804 if (nr_cgroups && !system_wide) {
805 fprintf(stderr, "cgroup monitoring only available in"
806 " system-wide mode\n");
807 usage_with_options(record_usage, record_options);
808 }
809
911 symbol__init(); 810 symbol__init();
912 811
913 if (no_buildid_cache || no_buildid) 812 if (no_buildid_cache || no_buildid)
914 disable_buildid_cache(); 813 disable_buildid_cache();
915 814
916 if (list_empty(&evsel_list) && perf_evsel_list__create_default() < 0) { 815 if (evsel_list->nr_entries == 0 &&
816 perf_evlist__add_default(evsel_list) < 0) {
917 pr_err("Not enough memory for event selector list\n"); 817 pr_err("Not enough memory for event selector list\n");
918 goto out_symbol_exit; 818 goto out_symbol_exit;
919 } 819 }
@@ -921,27 +821,19 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
921 if (target_pid != -1) 821 if (target_pid != -1)
922 target_tid = target_pid; 822 target_tid = target_pid;
923 823
924 threads = thread_map__new(target_pid, target_tid); 824 if (perf_evlist__create_maps(evsel_list, target_pid,
925 if (threads == NULL) { 825 target_tid, cpu_list) < 0)
926 pr_err("Problems finding threads of monitor\n");
927 usage_with_options(record_usage, record_options); 826 usage_with_options(record_usage, record_options);
928 }
929 827
930 cpus = cpu_map__new(cpu_list); 828 list_for_each_entry(pos, &evsel_list->entries, node) {
931 if (cpus == NULL) { 829 if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
932 perror("failed to parse CPUs map"); 830 evsel_list->threads->nr) < 0)
933 return -1;
934 }
935
936 list_for_each_entry(pos, &evsel_list, node) {
937 if (perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
938 goto out_free_fd; 831 goto out_free_fd;
939 if (perf_header__push_event(pos->attr.config, event_name(pos))) 832 if (perf_header__push_event(pos->attr.config, event_name(pos)))
940 goto out_free_fd; 833 goto out_free_fd;
941 } 834 }
942 event_array = malloc((sizeof(struct pollfd) * MAX_NR_CPUS * 835
943 MAX_COUNTERS * threads->nr)); 836 if (perf_evlist__alloc_pollfd(evsel_list) < 0)
944 if (!event_array)
945 goto out_free_fd; 837 goto out_free_fd;
946 838
947 if (user_interval != ULLONG_MAX) 839 if (user_interval != ULLONG_MAX)
@@ -959,16 +851,12 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
959 } else { 851 } else {
960 fprintf(stderr, "frequency and count are zero, aborting\n"); 852 fprintf(stderr, "frequency and count are zero, aborting\n");
961 err = -EINVAL; 853 err = -EINVAL;
962 goto out_free_event_array; 854 goto out_free_fd;
963 } 855 }
964 856
965 err = __cmd_record(argc, argv); 857 err = __cmd_record(argc, argv);
966
967out_free_event_array:
968 free(event_array);
969out_free_fd: 858out_free_fd:
970 thread_map__delete(threads); 859 perf_evlist__delete_maps(evsel_list);
971 threads = NULL;
972out_symbol_exit: 860out_symbol_exit:
973 symbol__exit(); 861 symbol__exit();
974 return err; 862 return err;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index c27e31f289e6..b1b82009ab9b 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -9,6 +9,7 @@
9 9
10#include "util/util.h" 10#include "util/util.h"
11 11
12#include "util/annotate.h"
12#include "util/color.h" 13#include "util/color.h"
13#include <linux/list.h> 14#include <linux/list.h>
14#include "util/cache.h" 15#include "util/cache.h"
@@ -20,6 +21,8 @@
20 21
21#include "perf.h" 22#include "perf.h"
22#include "util/debug.h" 23#include "util/debug.h"
24#include "util/evlist.h"
25#include "util/evsel.h"
23#include "util/header.h" 26#include "util/header.h"
24#include "util/session.h" 27#include "util/session.h"
25 28
@@ -43,120 +46,79 @@ static const char default_pretty_printing_style[] = "normal";
43static const char *pretty_printing_style = default_pretty_printing_style; 46static const char *pretty_printing_style = default_pretty_printing_style;
44 47
45static char callchain_default_opt[] = "fractal,0.5"; 48static char callchain_default_opt[] = "fractal,0.5";
49static symbol_filter_t annotate_init;
46 50
47static struct hists *perf_session__hists_findnew(struct perf_session *self, 51static int perf_session__add_hist_entry(struct perf_session *session,
48 u64 event_stream, u32 type,
49 u64 config)
50{
51 struct rb_node **p = &self->hists_tree.rb_node;
52 struct rb_node *parent = NULL;
53 struct hists *iter, *new;
54
55 while (*p != NULL) {
56 parent = *p;
57 iter = rb_entry(parent, struct hists, rb_node);
58 if (iter->config == config)
59 return iter;
60
61
62 if (config > iter->config)
63 p = &(*p)->rb_right;
64 else
65 p = &(*p)->rb_left;
66 }
67
68 new = malloc(sizeof(struct hists));
69 if (new == NULL)
70 return NULL;
71 memset(new, 0, sizeof(struct hists));
72 new->event_stream = event_stream;
73 new->config = config;
74 new->type = type;
75 rb_link_node(&new->rb_node, parent, p);
76 rb_insert_color(&new->rb_node, &self->hists_tree);
77 return new;
78}
79
80static int perf_session__add_hist_entry(struct perf_session *self,
81 struct addr_location *al, 52 struct addr_location *al,
82 struct sample_data *data) 53 struct perf_sample *sample)
83{ 54{
84 struct map_symbol *syms = NULL;
85 struct symbol *parent = NULL; 55 struct symbol *parent = NULL;
86 int err = -ENOMEM; 56 int err = 0;
87 struct hist_entry *he; 57 struct hist_entry *he;
88 struct hists *hists; 58 struct perf_evsel *evsel;
89 struct perf_event_attr *attr; 59
90 60 if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) {
91 if ((sort__has_parent || symbol_conf.use_callchain) && data->callchain) { 61 err = perf_session__resolve_callchain(session, al->thread,
92 syms = perf_session__resolve_callchain(self, al->thread, 62 sample->callchain, &parent);
93 data->callchain, &parent); 63 if (err)
94 if (syms == NULL) 64 return err;
95 return -ENOMEM;
96 } 65 }
97 66
98 attr = perf_header__find_attr(data->id, &self->header); 67 evsel = perf_evlist__id2evsel(session->evlist, sample->id);
99 if (attr) 68 if (evsel == NULL) {
100 hists = perf_session__hists_findnew(self, data->id, attr->type, attr->config); 69 /*
101 else 70 * FIXME: Propagate this back, but at least we're in a builtin,
102 hists = perf_session__hists_findnew(self, data->id, 0, 0); 71 * where exit() is allowed. ;-)
103 if (hists == NULL) 72 */
104 goto out_free_syms; 73 ui__warning("Invalid %s file, contains samples with id %" PRIu64 " not in "
105 he = __hists__add_entry(hists, al, parent, data->period); 74 "its header!\n", input_name, sample->id);
75 exit_browser(0);
76 exit(1);
77 }
78
79 he = __hists__add_entry(&evsel->hists, al, parent, sample->period);
106 if (he == NULL) 80 if (he == NULL)
107 goto out_free_syms; 81 return -ENOMEM;
108 err = 0; 82
109 if (symbol_conf.use_callchain) { 83 if (symbol_conf.use_callchain) {
110 err = callchain_append(he->callchain, data->callchain, syms, 84 err = callchain_append(he->callchain, &session->callchain_cursor,
111 data->period); 85 sample->period);
112 if (err) 86 if (err)
113 goto out_free_syms; 87 return err;
114 } 88 }
115 /* 89 /*
116 * Only in the newt browser we are doing integrated annotation, 90 * Only in the newt browser we are doing integrated annotation,
117 * so we don't allocated the extra space needed because the stdio 91 * so we don't allocated the extra space needed because the stdio
118 * code will not use it. 92 * code will not use it.
119 */ 93 */
120 if (use_browser > 0) 94 if (al->sym != NULL && use_browser > 0) {
121 err = hist_entry__inc_addr_samples(he, al->addr); 95 struct annotation *notes = symbol__annotation(he->ms.sym);
122out_free_syms:
123 free(syms);
124 return err;
125}
126 96
127static int add_event_total(struct perf_session *session, 97 assert(evsel != NULL);
128 struct sample_data *data,
129 struct perf_event_attr *attr)
130{
131 struct hists *hists;
132 98
133 if (attr) 99 err = -ENOMEM;
134 hists = perf_session__hists_findnew(session, data->id, 100 if (notes->src == NULL &&
135 attr->type, attr->config); 101 symbol__alloc_hist(he->ms.sym, session->evlist->nr_entries) < 0)
136 else 102 goto out;
137 hists = perf_session__hists_findnew(session, data->id, 0, 0);
138 103
139 if (!hists) 104 err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
140 return -ENOMEM; 105 }
141 106
142 hists->stats.total_period += data->period; 107 evsel->hists.stats.total_period += sample->period;
143 /* 108 hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
144 * FIXME: add_event_total should be moved from here to 109out:
145 * perf_session__process_event so that the proper hist is passed to 110 return err;
146 * the event_op methods.
147 */
148 hists__inc_nr_events(hists, PERF_RECORD_SAMPLE);
149 session->hists.stats.total_period += data->period;
150 return 0;
151} 111}
152 112
153static int process_sample_event(event_t *event, struct sample_data *sample, 113
114static int process_sample_event(union perf_event *event,
115 struct perf_sample *sample,
154 struct perf_session *session) 116 struct perf_session *session)
155{ 117{
156 struct addr_location al; 118 struct addr_location al;
157 struct perf_event_attr *attr;
158 119
159 if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) { 120 if (perf_event__preprocess_sample(event, session, &al, sample,
121 annotate_init) < 0) {
160 fprintf(stderr, "problem processing %d event, skipping it.\n", 122 fprintf(stderr, "problem processing %d event, skipping it.\n",
161 event->header.type); 123 event->header.type);
162 return -1; 124 return -1;
@@ -170,26 +132,17 @@ static int process_sample_event(event_t *event, struct sample_data *sample,
170 return -1; 132 return -1;
171 } 133 }
172 134
173 attr = perf_header__find_attr(sample->id, &session->header);
174
175 if (add_event_total(session, sample, attr)) {
176 pr_debug("problem adding event period\n");
177 return -1;
178 }
179
180 return 0; 135 return 0;
181} 136}
182 137
183static int process_read_event(event_t *event, struct sample_data *sample __used, 138static int process_read_event(union perf_event *event,
184 struct perf_session *session __used) 139 struct perf_sample *sample __used,
140 struct perf_session *session)
185{ 141{
186 struct perf_event_attr *attr; 142 struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist,
187 143 event->read.id);
188 attr = perf_header__find_attr(event->read.id, &session->header);
189
190 if (show_threads) { 144 if (show_threads) {
191 const char *name = attr ? __event_name(attr->type, attr->config) 145 const char *name = evsel ? event_name(evsel) : "unknown";
192 : "unknown";
193 perf_read_values_add_value(&show_threads_values, 146 perf_read_values_add_value(&show_threads_values,
194 event->read.pid, event->read.tid, 147 event->read.pid, event->read.tid,
195 event->read.id, 148 event->read.id,
@@ -198,7 +151,7 @@ static int process_read_event(event_t *event, struct sample_data *sample __used,
198 } 151 }
199 152
200 dump_printf(": %d %d %s %" PRIu64 "\n", event->read.pid, event->read.tid, 153 dump_printf(": %d %d %s %" PRIu64 "\n", event->read.pid, event->read.tid,
201 attr ? __event_name(attr->type, attr->config) : "FAIL", 154 evsel ? event_name(evsel) : "FAIL",
202 event->read.value); 155 event->read.value);
203 156
204 return 0; 157 return 0;
@@ -222,7 +175,7 @@ static int perf_session__setup_sample_type(struct perf_session *self)
222 } else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE && 175 } else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE &&
223 !symbol_conf.use_callchain) { 176 !symbol_conf.use_callchain) {
224 symbol_conf.use_callchain = true; 177 symbol_conf.use_callchain = true;
225 if (register_callchain_param(&callchain_param) < 0) { 178 if (callchain_register_param(&callchain_param) < 0) {
226 fprintf(stderr, "Can't register callchain" 179 fprintf(stderr, "Can't register callchain"
227 " params\n"); 180 " params\n");
228 return -EINVAL; 181 return -EINVAL;
@@ -233,17 +186,17 @@ static int perf_session__setup_sample_type(struct perf_session *self)
233} 186}
234 187
235static struct perf_event_ops event_ops = { 188static struct perf_event_ops event_ops = {
236 .sample = process_sample_event, 189 .sample = process_sample_event,
237 .mmap = event__process_mmap, 190 .mmap = perf_event__process_mmap,
238 .comm = event__process_comm, 191 .comm = perf_event__process_comm,
239 .exit = event__process_task, 192 .exit = perf_event__process_task,
240 .fork = event__process_task, 193 .fork = perf_event__process_task,
241 .lost = event__process_lost, 194 .lost = perf_event__process_lost,
242 .read = process_read_event, 195 .read = process_read_event,
243 .attr = event__process_attr, 196 .attr = perf_event__process_attr,
244 .event_type = event__process_event_type, 197 .event_type = perf_event__process_event_type,
245 .tracing_data = event__process_tracing_data, 198 .tracing_data = perf_event__process_tracing_data,
246 .build_id = event__process_build_id, 199 .build_id = perf_event__process_build_id,
247 .ordered_samples = true, 200 .ordered_samples = true,
248 .ordering_requires_timestamps = true, 201 .ordering_requires_timestamps = true,
249}; 202};
@@ -269,21 +222,21 @@ static size_t hists__fprintf_nr_sample_events(struct hists *self,
269 return ret + fprintf(fp, "\n#\n"); 222 return ret + fprintf(fp, "\n#\n");
270} 223}
271 224
272static int hists__tty_browse_tree(struct rb_root *tree, const char *help) 225static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
226 const char *help)
273{ 227{
274 struct rb_node *next = rb_first(tree); 228 struct perf_evsel *pos;
275 229
276 while (next) { 230 list_for_each_entry(pos, &evlist->entries, node) {
277 struct hists *hists = rb_entry(next, struct hists, rb_node); 231 struct hists *hists = &pos->hists;
278 const char *evname = NULL; 232 const char *evname = NULL;
279 233
280 if (rb_first(&hists->entries) != rb_last(&hists->entries)) 234 if (rb_first(&hists->entries) != rb_last(&hists->entries))
281 evname = __event_name(hists->type, hists->config); 235 evname = event_name(pos);
282 236
283 hists__fprintf_nr_sample_events(hists, evname, stdout); 237 hists__fprintf_nr_sample_events(hists, evname, stdout);
284 hists__fprintf(hists, NULL, false, stdout); 238 hists__fprintf(hists, NULL, false, stdout);
285 fprintf(stdout, "\n\n"); 239 fprintf(stdout, "\n\n");
286 next = rb_next(&hists->rb_node);
287 } 240 }
288 241
289 if (sort_order == default_sort_order && 242 if (sort_order == default_sort_order &&
@@ -304,8 +257,9 @@ static int hists__tty_browse_tree(struct rb_root *tree, const char *help)
304static int __cmd_report(void) 257static int __cmd_report(void)
305{ 258{
306 int ret = -EINVAL; 259 int ret = -EINVAL;
260 u64 nr_samples;
307 struct perf_session *session; 261 struct perf_session *session;
308 struct rb_node *next; 262 struct perf_evsel *pos;
309 const char *help = "For a higher level overview, try: perf report --sort comm,dso"; 263 const char *help = "For a higher level overview, try: perf report --sort comm,dso";
310 264
311 signal(SIGINT, sig_handler); 265 signal(SIGINT, sig_handler);
@@ -336,20 +290,24 @@ static int __cmd_report(void)
336 if (verbose > 2) 290 if (verbose > 2)
337 perf_session__fprintf_dsos(session, stdout); 291 perf_session__fprintf_dsos(session, stdout);
338 292
339 next = rb_first(&session->hists_tree); 293 nr_samples = 0;
340 while (next) { 294 list_for_each_entry(pos, &session->evlist->entries, node) {
341 struct hists *hists; 295 struct hists *hists = &pos->hists;
342 296
343 hists = rb_entry(next, struct hists, rb_node);
344 hists__collapse_resort(hists); 297 hists__collapse_resort(hists);
345 hists__output_resort(hists); 298 hists__output_resort(hists);
346 next = rb_next(&hists->rb_node); 299 nr_samples += hists->stats.nr_events[PERF_RECORD_SAMPLE];
300 }
301
302 if (nr_samples == 0) {
303 ui__warning("The %s file has no samples!\n", input_name);
304 goto out_delete;
347 } 305 }
348 306
349 if (use_browser > 0) 307 if (use_browser > 0)
350 hists__tui_browse_tree(&session->hists_tree, help); 308 perf_evlist__tui_browse_hists(session->evlist, help);
351 else 309 else
352 hists__tty_browse_tree(&session->hists_tree, help); 310 perf_evlist__tty_browse_hists(session->evlist, help);
353 311
354out_delete: 312out_delete:
355 /* 313 /*
@@ -424,7 +382,7 @@ parse_callchain_opt(const struct option *opt __used, const char *arg,
424 if (tok2) 382 if (tok2)
425 callchain_param.print_limit = strtod(tok2, &endptr); 383 callchain_param.print_limit = strtod(tok2, &endptr);
426setup: 384setup:
427 if (register_callchain_param(&callchain_param) < 0) { 385 if (callchain_register_param(&callchain_param) < 0) {
428 fprintf(stderr, "Can't register callchain params\n"); 386 fprintf(stderr, "Can't register callchain params\n");
429 return -1; 387 return -1;
430 } 388 }
@@ -498,7 +456,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
498 use_browser = 1; 456 use_browser = 1;
499 457
500 if (strcmp(input_name, "-") != 0) 458 if (strcmp(input_name, "-") != 0)
501 setup_browser(); 459 setup_browser(true);
502 else 460 else
503 use_browser = 0; 461 use_browser = 0;
504 /* 462 /*
@@ -507,7 +465,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
507 * implementation. 465 * implementation.
508 */ 466 */
509 if (use_browser > 0) { 467 if (use_browser > 0) {
510 symbol_conf.priv_size = sizeof(struct sym_priv); 468 symbol_conf.priv_size = sizeof(struct annotation);
469 annotate_init = symbol__annotate_init;
511 /* 470 /*
512 * For searching by name on the "Browse map details". 471 * For searching by name on the "Browse map details".
513 * providing it only in verbose mode not to bloat too 472 * providing it only in verbose mode not to bloat too
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 29acb894e035..a32f411faeac 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -369,11 +369,6 @@ static void
369process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom) 369process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom)
370{ 370{
371 int ret = 0; 371 int ret = 0;
372 u64 now;
373 long long delta;
374
375 now = get_nsecs();
376 delta = start_time + atom->timestamp - now;
377 372
378 switch (atom->type) { 373 switch (atom->type) {
379 case SCHED_EVENT_RUN: 374 case SCHED_EVENT_RUN:
@@ -562,7 +557,7 @@ static void wait_for_tasks(void)
562 557
563static void run_one_test(void) 558static void run_one_test(void)
564{ 559{
565 u64 T0, T1, delta, avg_delta, fluct, std_dev; 560 u64 T0, T1, delta, avg_delta, fluct;
566 561
567 T0 = get_nsecs(); 562 T0 = get_nsecs();
568 wait_for_tasks(); 563 wait_for_tasks();
@@ -578,7 +573,6 @@ static void run_one_test(void)
578 else 573 else
579 fluct = delta - avg_delta; 574 fluct = delta - avg_delta;
580 sum_fluct += fluct; 575 sum_fluct += fluct;
581 std_dev = sum_fluct / nr_runs / sqrt(nr_runs);
582 if (!run_avg) 576 if (!run_avg)
583 run_avg = delta; 577 run_avg = delta;
584 run_avg = (run_avg*9 + delta)/10; 578 run_avg = (run_avg*9 + delta)/10;
@@ -799,7 +793,7 @@ replay_switch_event(struct trace_switch_event *switch_event,
799 u64 timestamp, 793 u64 timestamp,
800 struct thread *thread __used) 794 struct thread *thread __used)
801{ 795{
802 struct task_desc *prev, *next; 796 struct task_desc *prev, __used *next;
803 u64 timestamp0; 797 u64 timestamp0;
804 s64 delta; 798 s64 delta;
805 799
@@ -1404,7 +1398,7 @@ map_switch_event(struct trace_switch_event *switch_event,
1404 u64 timestamp, 1398 u64 timestamp,
1405 struct thread *thread __used) 1399 struct thread *thread __used)
1406{ 1400{
1407 struct thread *sched_out, *sched_in; 1401 struct thread *sched_out __used, *sched_in;
1408 int new_shortname; 1402 int new_shortname;
1409 u64 timestamp0; 1403 u64 timestamp0;
1410 s64 delta; 1404 s64 delta;
@@ -1580,9 +1574,9 @@ process_sched_migrate_task_event(void *data, struct perf_session *session,
1580 event, cpu, timestamp, thread); 1574 event, cpu, timestamp, thread);
1581} 1575}
1582 1576
1583static void 1577static void process_raw_event(union perf_event *raw_event __used,
1584process_raw_event(event_t *raw_event __used, struct perf_session *session, 1578 struct perf_session *session, void *data, int cpu,
1585 void *data, int cpu, u64 timestamp, struct thread *thread) 1579 u64 timestamp, struct thread *thread)
1586{ 1580{
1587 struct event *event; 1581 struct event *event;
1588 int type; 1582 int type;
@@ -1607,7 +1601,8 @@ process_raw_event(event_t *raw_event __used, struct perf_session *session,
1607 process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread); 1601 process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread);
1608} 1602}
1609 1603
1610static int process_sample_event(event_t *event, struct sample_data *sample, 1604static int process_sample_event(union perf_event *event,
1605 struct perf_sample *sample,
1611 struct perf_session *session) 1606 struct perf_session *session)
1612{ 1607{
1613 struct thread *thread; 1608 struct thread *thread;
@@ -1635,9 +1630,9 @@ static int process_sample_event(event_t *event, struct sample_data *sample,
1635 1630
1636static struct perf_event_ops event_ops = { 1631static struct perf_event_ops event_ops = {
1637 .sample = process_sample_event, 1632 .sample = process_sample_event,
1638 .comm = event__process_comm, 1633 .comm = perf_event__process_comm,
1639 .lost = event__process_lost, 1634 .lost = perf_event__process_lost,
1640 .fork = event__process_task, 1635 .fork = perf_event__process_task,
1641 .ordered_samples = true, 1636 .ordered_samples = true,
1642}; 1637};
1643 1638
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index b766c2a9ac97..5f40df635dcb 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -63,7 +63,8 @@ static int cleanup_scripting(void)
63 63
64static char const *input_name = "perf.data"; 64static char const *input_name = "perf.data";
65 65
66static int process_sample_event(event_t *event, struct sample_data *sample, 66static int process_sample_event(union perf_event *event,
67 struct perf_sample *sample,
67 struct perf_session *session) 68 struct perf_session *session)
68{ 69{
69 struct thread *thread = perf_session__findnew(session, event->ip.pid); 70 struct thread *thread = perf_session__findnew(session, event->ip.pid);
@@ -100,14 +101,14 @@ static int process_sample_event(event_t *event, struct sample_data *sample,
100} 101}
101 102
102static struct perf_event_ops event_ops = { 103static struct perf_event_ops event_ops = {
103 .sample = process_sample_event, 104 .sample = process_sample_event,
104 .comm = event__process_comm, 105 .comm = perf_event__process_comm,
105 .attr = event__process_attr, 106 .attr = perf_event__process_attr,
106 .event_type = event__process_event_type, 107 .event_type = perf_event__process_event_type,
107 .tracing_data = event__process_tracing_data, 108 .tracing_data = perf_event__process_tracing_data,
108 .build_id = event__process_build_id, 109 .build_id = perf_event__process_build_id,
109 .ordering_requires_timestamps = true,
110 .ordered_samples = true, 110 .ordered_samples = true,
111 .ordering_requires_timestamps = true,
111}; 112};
112 113
113extern volatile int session_done; 114extern volatile int session_done;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a482a191a0ca..21c025222496 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -43,11 +43,13 @@
43#include "util/parse-options.h" 43#include "util/parse-options.h"
44#include "util/parse-events.h" 44#include "util/parse-events.h"
45#include "util/event.h" 45#include "util/event.h"
46#include "util/evlist.h"
46#include "util/evsel.h" 47#include "util/evsel.h"
47#include "util/debug.h" 48#include "util/debug.h"
48#include "util/header.h" 49#include "util/header.h"
49#include "util/cpumap.h" 50#include "util/cpumap.h"
50#include "util/thread.h" 51#include "util/thread.h"
52#include "util/thread_map.h"
51 53
52#include <sys/prctl.h> 54#include <sys/prctl.h>
53#include <math.h> 55#include <math.h>
@@ -71,8 +73,9 @@ static struct perf_event_attr default_attrs[] = {
71 73
72}; 74};
73 75
76struct perf_evlist *evsel_list;
77
74static bool system_wide = false; 78static bool system_wide = false;
75static struct cpu_map *cpus;
76static int run_idx = 0; 79static int run_idx = 0;
77 80
78static int run_count = 1; 81static int run_count = 1;
@@ -81,7 +84,6 @@ static bool scale = true;
81static bool no_aggr = false; 84static bool no_aggr = false;
82static pid_t target_pid = -1; 85static pid_t target_pid = -1;
83static pid_t target_tid = -1; 86static pid_t target_tid = -1;
84static struct thread_map *threads;
85static pid_t child_pid = -1; 87static pid_t child_pid = -1;
86static bool null_run = false; 88static bool null_run = false;
87static bool big_num = true; 89static bool big_num = true;
@@ -166,7 +168,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
166 PERF_FORMAT_TOTAL_TIME_RUNNING; 168 PERF_FORMAT_TOTAL_TIME_RUNNING;
167 169
168 if (system_wide) 170 if (system_wide)
169 return perf_evsel__open_per_cpu(evsel, cpus); 171 return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, false, false);
170 172
171 attr->inherit = !no_inherit; 173 attr->inherit = !no_inherit;
172 if (target_pid == -1 && target_tid == -1) { 174 if (target_pid == -1 && target_tid == -1) {
@@ -174,7 +176,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
174 attr->enable_on_exec = 1; 176 attr->enable_on_exec = 1;
175 } 177 }
176 178
177 return perf_evsel__open_per_thread(evsel, threads); 179 return perf_evsel__open_per_thread(evsel, evsel_list->threads, false, false);
178} 180}
179 181
180/* 182/*
@@ -199,7 +201,8 @@ static int read_counter_aggr(struct perf_evsel *counter)
199 u64 *count = counter->counts->aggr.values; 201 u64 *count = counter->counts->aggr.values;
200 int i; 202 int i;
201 203
202 if (__perf_evsel__read(counter, cpus->nr, threads->nr, scale) < 0) 204 if (__perf_evsel__read(counter, evsel_list->cpus->nr,
205 evsel_list->threads->nr, scale) < 0)
203 return -1; 206 return -1;
204 207
205 for (i = 0; i < 3; i++) 208 for (i = 0; i < 3; i++)
@@ -232,7 +235,7 @@ static int read_counter(struct perf_evsel *counter)
232 u64 *count; 235 u64 *count;
233 int cpu; 236 int cpu;
234 237
235 for (cpu = 0; cpu < cpus->nr; cpu++) { 238 for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) {
236 if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0) 239 if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
237 return -1; 240 return -1;
238 241
@@ -297,7 +300,7 @@ static int run_perf_stat(int argc __used, const char **argv)
297 } 300 }
298 301
299 if (target_tid == -1 && target_pid == -1 && !system_wide) 302 if (target_tid == -1 && target_pid == -1 && !system_wide)
300 threads->map[0] = child_pid; 303 evsel_list->threads->map[0] = child_pid;
301 304
302 /* 305 /*
303 * Wait for the child to be ready to exec. 306 * Wait for the child to be ready to exec.
@@ -309,7 +312,7 @@ static int run_perf_stat(int argc __used, const char **argv)
309 close(child_ready_pipe[0]); 312 close(child_ready_pipe[0]);
310 } 313 }
311 314
312 list_for_each_entry(counter, &evsel_list, node) { 315 list_for_each_entry(counter, &evsel_list->entries, node) {
313 if (create_perf_stat_counter(counter) < 0) { 316 if (create_perf_stat_counter(counter) < 0) {
314 if (errno == -EPERM || errno == -EACCES) { 317 if (errno == -EPERM || errno == -EACCES) {
315 error("You may not have permission to collect %sstats.\n" 318 error("You may not have permission to collect %sstats.\n"
@@ -347,14 +350,15 @@ static int run_perf_stat(int argc __used, const char **argv)
347 update_stats(&walltime_nsecs_stats, t1 - t0); 350 update_stats(&walltime_nsecs_stats, t1 - t0);
348 351
349 if (no_aggr) { 352 if (no_aggr) {
350 list_for_each_entry(counter, &evsel_list, node) { 353 list_for_each_entry(counter, &evsel_list->entries, node) {
351 read_counter(counter); 354 read_counter(counter);
352 perf_evsel__close_fd(counter, cpus->nr, 1); 355 perf_evsel__close_fd(counter, evsel_list->cpus->nr, 1);
353 } 356 }
354 } else { 357 } else {
355 list_for_each_entry(counter, &evsel_list, node) { 358 list_for_each_entry(counter, &evsel_list->entries, node) {
356 read_counter_aggr(counter); 359 read_counter_aggr(counter);
357 perf_evsel__close_fd(counter, cpus->nr, threads->nr); 360 perf_evsel__close_fd(counter, evsel_list->cpus->nr,
361 evsel_list->threads->nr);
358 } 362 }
359 } 363 }
360 364
@@ -382,10 +386,13 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
382 if (no_aggr) 386 if (no_aggr)
383 sprintf(cpustr, "CPU%*d%s", 387 sprintf(cpustr, "CPU%*d%s",
384 csv_output ? 0 : -4, 388 csv_output ? 0 : -4,
385 cpus->map[cpu], csv_sep); 389 evsel_list->cpus->map[cpu], csv_sep);
386 390
387 fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(evsel)); 391 fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(evsel));
388 392
393 if (evsel->cgrp)
394 fprintf(stderr, "%s%s", csv_sep, evsel->cgrp->name);
395
389 if (csv_output) 396 if (csv_output)
390 return; 397 return;
391 398
@@ -410,12 +417,15 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
410 if (no_aggr) 417 if (no_aggr)
411 sprintf(cpustr, "CPU%*d%s", 418 sprintf(cpustr, "CPU%*d%s",
412 csv_output ? 0 : -4, 419 csv_output ? 0 : -4,
413 cpus->map[cpu], csv_sep); 420 evsel_list->cpus->map[cpu], csv_sep);
414 else 421 else
415 cpu = 0; 422 cpu = 0;
416 423
417 fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(evsel)); 424 fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(evsel));
418 425
426 if (evsel->cgrp)
427 fprintf(stderr, "%s%s", csv_sep, evsel->cgrp->name);
428
419 if (csv_output) 429 if (csv_output)
420 return; 430 return;
421 431
@@ -456,9 +466,17 @@ static void print_counter_aggr(struct perf_evsel *counter)
456 int scaled = counter->counts->scaled; 466 int scaled = counter->counts->scaled;
457 467
458 if (scaled == -1) { 468 if (scaled == -1) {
459 fprintf(stderr, "%*s%s%-24s\n", 469 fprintf(stderr, "%*s%s%*s",
460 csv_output ? 0 : 18, 470 csv_output ? 0 : 18,
461 "<not counted>", csv_sep, event_name(counter)); 471 "<not counted>",
472 csv_sep,
473 csv_output ? 0 : -24,
474 event_name(counter));
475
476 if (counter->cgrp)
477 fprintf(stderr, "%s%s", csv_sep, counter->cgrp->name);
478
479 fputc('\n', stderr);
462 return; 480 return;
463 } 481 }
464 482
@@ -483,7 +501,6 @@ static void print_counter_aggr(struct perf_evsel *counter)
483 fprintf(stderr, " (scaled from %.2f%%)", 501 fprintf(stderr, " (scaled from %.2f%%)",
484 100 * avg_running / avg_enabled); 502 100 * avg_running / avg_enabled);
485 } 503 }
486
487 fprintf(stderr, "\n"); 504 fprintf(stderr, "\n");
488} 505}
489 506
@@ -496,19 +513,23 @@ static void print_counter(struct perf_evsel *counter)
496 u64 ena, run, val; 513 u64 ena, run, val;
497 int cpu; 514 int cpu;
498 515
499 for (cpu = 0; cpu < cpus->nr; cpu++) { 516 for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) {
500 val = counter->counts->cpu[cpu].val; 517 val = counter->counts->cpu[cpu].val;
501 ena = counter->counts->cpu[cpu].ena; 518 ena = counter->counts->cpu[cpu].ena;
502 run = counter->counts->cpu[cpu].run; 519 run = counter->counts->cpu[cpu].run;
503 if (run == 0 || ena == 0) { 520 if (run == 0 || ena == 0) {
504 fprintf(stderr, "CPU%*d%s%*s%s%-24s", 521 fprintf(stderr, "CPU%*d%s%*s%s%*s",
505 csv_output ? 0 : -4, 522 csv_output ? 0 : -4,
506 cpus->map[cpu], csv_sep, 523 evsel_list->cpus->map[cpu], csv_sep,
507 csv_output ? 0 : 18, 524 csv_output ? 0 : 18,
508 "<not counted>", csv_sep, 525 "<not counted>", csv_sep,
526 csv_output ? 0 : -24,
509 event_name(counter)); 527 event_name(counter));
510 528
511 fprintf(stderr, "\n"); 529 if (counter->cgrp)
530 fprintf(stderr, "%s%s", csv_sep, counter->cgrp->name);
531
532 fputc('\n', stderr);
512 continue; 533 continue;
513 } 534 }
514 535
@@ -525,7 +546,7 @@ static void print_counter(struct perf_evsel *counter)
525 100.0 * run / ena); 546 100.0 * run / ena);
526 } 547 }
527 } 548 }
528 fprintf(stderr, "\n"); 549 fputc('\n', stderr);
529 } 550 }
530} 551}
531 552
@@ -555,10 +576,10 @@ static void print_stat(int argc, const char **argv)
555 } 576 }
556 577
557 if (no_aggr) { 578 if (no_aggr) {
558 list_for_each_entry(counter, &evsel_list, node) 579 list_for_each_entry(counter, &evsel_list->entries, node)
559 print_counter(counter); 580 print_counter(counter);
560 } else { 581 } else {
561 list_for_each_entry(counter, &evsel_list, node) 582 list_for_each_entry(counter, &evsel_list->entries, node)
562 print_counter_aggr(counter); 583 print_counter_aggr(counter);
563 } 584 }
564 585
@@ -610,7 +631,7 @@ static int stat__set_big_num(const struct option *opt __used,
610} 631}
611 632
612static const struct option options[] = { 633static const struct option options[] = {
613 OPT_CALLBACK('e', "event", NULL, "event", 634 OPT_CALLBACK('e', "event", &evsel_list, "event",
614 "event selector. use 'perf list' to list available events", 635 "event selector. use 'perf list' to list available events",
615 parse_events), 636 parse_events),
616 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 637 OPT_BOOLEAN('i', "no-inherit", &no_inherit,
@@ -638,6 +659,9 @@ static const struct option options[] = {
638 "disable CPU count aggregation"), 659 "disable CPU count aggregation"),
639 OPT_STRING('x', "field-separator", &csv_sep, "separator", 660 OPT_STRING('x', "field-separator", &csv_sep, "separator",
640 "print counts with custom separator"), 661 "print counts with custom separator"),
662 OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
663 "monitor event in cgroup name only",
664 parse_cgroups),
641 OPT_END() 665 OPT_END()
642}; 666};
643 667
@@ -648,6 +672,10 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
648 672
649 setlocale(LC_ALL, ""); 673 setlocale(LC_ALL, "");
650 674
675 evsel_list = perf_evlist__new(NULL, NULL);
676 if (evsel_list == NULL)
677 return -ENOMEM;
678
651 argc = parse_options(argc, argv, options, stat_usage, 679 argc = parse_options(argc, argv, options, stat_usage,
652 PARSE_OPT_STOP_AT_NON_OPTION); 680 PARSE_OPT_STOP_AT_NON_OPTION);
653 681
@@ -674,49 +702,50 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
674 if (run_count <= 0) 702 if (run_count <= 0)
675 usage_with_options(stat_usage, options); 703 usage_with_options(stat_usage, options);
676 704
677 /* no_aggr is for system-wide only */ 705 /* no_aggr, cgroup are for system-wide only */
678 if (no_aggr && !system_wide) 706 if ((no_aggr || nr_cgroups) && !system_wide) {
707 fprintf(stderr, "both cgroup and no-aggregation "
708 "modes only available in system-wide mode\n");
709
679 usage_with_options(stat_usage, options); 710 usage_with_options(stat_usage, options);
711 }
680 712
681 /* Set attrs and nr_counters if no event is selected and !null_run */ 713 /* Set attrs and nr_counters if no event is selected and !null_run */
682 if (!null_run && !nr_counters) { 714 if (!null_run && !evsel_list->nr_entries) {
683 size_t c; 715 size_t c;
684 716
685 nr_counters = ARRAY_SIZE(default_attrs);
686
687 for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) { 717 for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) {
688 pos = perf_evsel__new(&default_attrs[c], 718 pos = perf_evsel__new(&default_attrs[c], c);
689 nr_counters);
690 if (pos == NULL) 719 if (pos == NULL)
691 goto out; 720 goto out;
692 list_add(&pos->node, &evsel_list); 721 perf_evlist__add(evsel_list, pos);
693 } 722 }
694 } 723 }
695 724
696 if (target_pid != -1) 725 if (target_pid != -1)
697 target_tid = target_pid; 726 target_tid = target_pid;
698 727
699 threads = thread_map__new(target_pid, target_tid); 728 evsel_list->threads = thread_map__new(target_pid, target_tid);
700 if (threads == NULL) { 729 if (evsel_list->threads == NULL) {
701 pr_err("Problems finding threads of monitor\n"); 730 pr_err("Problems finding threads of monitor\n");
702 usage_with_options(stat_usage, options); 731 usage_with_options(stat_usage, options);
703 } 732 }
704 733
705 if (system_wide) 734 if (system_wide)
706 cpus = cpu_map__new(cpu_list); 735 evsel_list->cpus = cpu_map__new(cpu_list);
707 else 736 else
708 cpus = cpu_map__dummy_new(); 737 evsel_list->cpus = cpu_map__dummy_new();
709 738
710 if (cpus == NULL) { 739 if (evsel_list->cpus == NULL) {
711 perror("failed to parse CPUs map"); 740 perror("failed to parse CPUs map");
712 usage_with_options(stat_usage, options); 741 usage_with_options(stat_usage, options);
713 return -1; 742 return -1;
714 } 743 }
715 744
716 list_for_each_entry(pos, &evsel_list, node) { 745 list_for_each_entry(pos, &evsel_list->entries, node) {
717 if (perf_evsel__alloc_stat_priv(pos) < 0 || 746 if (perf_evsel__alloc_stat_priv(pos) < 0 ||
718 perf_evsel__alloc_counts(pos, cpus->nr) < 0 || 747 perf_evsel__alloc_counts(pos, evsel_list->cpus->nr) < 0 ||
719 perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0) 748 perf_evsel__alloc_fd(pos, evsel_list->cpus->nr, evsel_list->threads->nr) < 0)
720 goto out_free_fd; 749 goto out_free_fd;
721 } 750 }
722 751
@@ -741,11 +770,10 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
741 if (status != -1) 770 if (status != -1)
742 print_stat(argc, argv); 771 print_stat(argc, argv);
743out_free_fd: 772out_free_fd:
744 list_for_each_entry(pos, &evsel_list, node) 773 list_for_each_entry(pos, &evsel_list->entries, node)
745 perf_evsel__free_stat_priv(pos); 774 perf_evsel__free_stat_priv(pos);
746 perf_evsel_list__delete(); 775 perf_evlist__delete_maps(evsel_list);
747out: 776out:
748 thread_map__delete(threads); 777 perf_evlist__delete(evsel_list);
749 threads = NULL;
750 return status; 778 return status;
751} 779}
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 5dcdba653d70..1b2106c58f66 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -7,10 +7,11 @@
7 7
8#include "util/cache.h" 8#include "util/cache.h"
9#include "util/debug.h" 9#include "util/debug.h"
10#include "util/evlist.h"
10#include "util/parse-options.h" 11#include "util/parse-options.h"
11#include "util/session.h" 12#include "util/parse-events.h"
12#include "util/symbol.h" 13#include "util/symbol.h"
13#include "util/thread.h" 14#include "util/thread_map.h"
14 15
15static long page_size; 16static long page_size;
16 17
@@ -238,14 +239,14 @@ out:
238#include "util/evsel.h" 239#include "util/evsel.h"
239#include <sys/types.h> 240#include <sys/types.h>
240 241
241static int trace_event__id(const char *event_name) 242static int trace_event__id(const char *evname)
242{ 243{
243 char *filename; 244 char *filename;
244 int err = -1, fd; 245 int err = -1, fd;
245 246
246 if (asprintf(&filename, 247 if (asprintf(&filename,
247 "/sys/kernel/debug/tracing/events/syscalls/%s/id", 248 "/sys/kernel/debug/tracing/events/syscalls/%s/id",
248 event_name) < 0) 249 evname) < 0)
249 return -1; 250 return -1;
250 251
251 fd = open(filename, O_RDONLY); 252 fd = open(filename, O_RDONLY);
@@ -289,7 +290,7 @@ static int test__open_syscall_event(void)
289 goto out_thread_map_delete; 290 goto out_thread_map_delete;
290 } 291 }
291 292
292 if (perf_evsel__open_per_thread(evsel, threads) < 0) { 293 if (perf_evsel__open_per_thread(evsel, threads, false, false) < 0) {
293 pr_debug("failed to open counter: %s, " 294 pr_debug("failed to open counter: %s, "
294 "tweak /proc/sys/kernel/perf_event_paranoid?\n", 295 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
295 strerror(errno)); 296 strerror(errno));
@@ -347,9 +348,9 @@ static int test__open_syscall_event_on_all_cpus(void)
347 } 348 }
348 349
349 cpus = cpu_map__new(NULL); 350 cpus = cpu_map__new(NULL);
350 if (threads == NULL) { 351 if (cpus == NULL) {
351 pr_debug("thread_map__new\n"); 352 pr_debug("cpu_map__new\n");
352 return -1; 353 goto out_thread_map_delete;
353 } 354 }
354 355
355 356
@@ -364,7 +365,7 @@ static int test__open_syscall_event_on_all_cpus(void)
364 goto out_thread_map_delete; 365 goto out_thread_map_delete;
365 } 366 }
366 367
367 if (perf_evsel__open(evsel, cpus, threads) < 0) { 368 if (perf_evsel__open(evsel, cpus, threads, false, false) < 0) {
368 pr_debug("failed to open counter: %s, " 369 pr_debug("failed to open counter: %s, "
369 "tweak /proc/sys/kernel/perf_event_paranoid?\n", 370 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
370 strerror(errno)); 371 strerror(errno));
@@ -408,6 +409,8 @@ static int test__open_syscall_event_on_all_cpus(void)
408 goto out_close_fd; 409 goto out_close_fd;
409 } 410 }
410 411
412 err = 0;
413
411 for (cpu = 0; cpu < cpus->nr; ++cpu) { 414 for (cpu = 0; cpu < cpus->nr; ++cpu) {
412 unsigned int expected; 415 unsigned int expected;
413 416
@@ -416,18 +419,18 @@ static int test__open_syscall_event_on_all_cpus(void)
416 419
417 if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) { 420 if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) {
418 pr_debug("perf_evsel__open_read_on_cpu\n"); 421 pr_debug("perf_evsel__open_read_on_cpu\n");
419 goto out_close_fd; 422 err = -1;
423 break;
420 } 424 }
421 425
422 expected = nr_open_calls + cpu; 426 expected = nr_open_calls + cpu;
423 if (evsel->counts->cpu[cpu].val != expected) { 427 if (evsel->counts->cpu[cpu].val != expected) {
424 pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n", 428 pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n",
425 expected, cpus->map[cpu], evsel->counts->cpu[cpu].val); 429 expected, cpus->map[cpu], evsel->counts->cpu[cpu].val);
426 goto out_close_fd; 430 err = -1;
427 } 431 }
428 } 432 }
429 433
430 err = 0;
431out_close_fd: 434out_close_fd:
432 perf_evsel__close_fd(evsel, 1, threads->nr); 435 perf_evsel__close_fd(evsel, 1, threads->nr);
433out_evsel_delete: 436out_evsel_delete:
@@ -437,6 +440,159 @@ out_thread_map_delete:
437 return err; 440 return err;
438} 441}
439 442
443/*
444 * This test will generate random numbers of calls to some getpid syscalls,
445 * then establish an mmap for a group of events that are created to monitor
446 * the syscalls.
447 *
448 * It will receive the events, using mmap, use its PERF_SAMPLE_ID generated
449 * sample.id field to map back to its respective perf_evsel instance.
450 *
451 * Then it checks if the number of syscalls reported as perf events by
452 * the kernel corresponds to the number of syscalls made.
453 */
454static int test__basic_mmap(void)
455{
456 int err = -1;
457 union perf_event *event;
458 struct thread_map *threads;
459 struct cpu_map *cpus;
460 struct perf_evlist *evlist;
461 struct perf_event_attr attr = {
462 .type = PERF_TYPE_TRACEPOINT,
463 .read_format = PERF_FORMAT_ID,
464 .sample_type = PERF_SAMPLE_ID,
465 .watermark = 0,
466 };
467 cpu_set_t cpu_set;
468 const char *syscall_names[] = { "getsid", "getppid", "getpgrp",
469 "getpgid", };
470 pid_t (*syscalls[])(void) = { (void *)getsid, getppid, getpgrp,
471 (void*)getpgid };
472#define nsyscalls ARRAY_SIZE(syscall_names)
473 int ids[nsyscalls];
474 unsigned int nr_events[nsyscalls],
475 expected_nr_events[nsyscalls], i, j;
476 struct perf_evsel *evsels[nsyscalls], *evsel;
477
478 for (i = 0; i < nsyscalls; ++i) {
479 char name[64];
480
481 snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]);
482 ids[i] = trace_event__id(name);
483 if (ids[i] < 0) {
484 pr_debug("Is debugfs mounted on /sys/kernel/debug?\n");
485 return -1;
486 }
487 nr_events[i] = 0;
488 expected_nr_events[i] = random() % 257;
489 }
490
491 threads = thread_map__new(-1, getpid());
492 if (threads == NULL) {
493 pr_debug("thread_map__new\n");
494 return -1;
495 }
496
497 cpus = cpu_map__new(NULL);
498 if (cpus == NULL) {
499 pr_debug("cpu_map__new\n");
500 goto out_free_threads;
501 }
502
503 CPU_ZERO(&cpu_set);
504 CPU_SET(cpus->map[0], &cpu_set);
505 sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
506 if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) {
507 pr_debug("sched_setaffinity() failed on CPU %d: %s ",
508 cpus->map[0], strerror(errno));
509 goto out_free_cpus;
510 }
511
512 evlist = perf_evlist__new(cpus, threads);
513 if (evlist == NULL) {
514 pr_debug("perf_evlist__new\n");
515 goto out_free_cpus;
516 }
517
518 /* anonymous union fields, can't be initialized above */
519 attr.wakeup_events = 1;
520 attr.sample_period = 1;
521
522 for (i = 0; i < nsyscalls; ++i) {
523 attr.config = ids[i];
524 evsels[i] = perf_evsel__new(&attr, i);
525 if (evsels[i] == NULL) {
526 pr_debug("perf_evsel__new\n");
527 goto out_free_evlist;
528 }
529
530 perf_evlist__add(evlist, evsels[i]);
531
532 if (perf_evsel__open(evsels[i], cpus, threads, false, false) < 0) {
533 pr_debug("failed to open counter: %s, "
534 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
535 strerror(errno));
536 goto out_close_fd;
537 }
538 }
539
540 if (perf_evlist__mmap(evlist, 128, true) < 0) {
541 pr_debug("failed to mmap events: %d (%s)\n", errno,
542 strerror(errno));
543 goto out_close_fd;
544 }
545
546 for (i = 0; i < nsyscalls; ++i)
547 for (j = 0; j < expected_nr_events[i]; ++j) {
548 int foo = syscalls[i]();
549 ++foo;
550 }
551
552 while ((event = perf_evlist__read_on_cpu(evlist, 0)) != NULL) {
553 struct perf_sample sample;
554
555 if (event->header.type != PERF_RECORD_SAMPLE) {
556 pr_debug("unexpected %s event\n",
557 perf_event__name(event->header.type));
558 goto out_munmap;
559 }
560
561 perf_event__parse_sample(event, attr.sample_type, false, &sample);
562 evsel = perf_evlist__id2evsel(evlist, sample.id);
563 if (evsel == NULL) {
564 pr_debug("event with id %" PRIu64
565 " doesn't map to an evsel\n", sample.id);
566 goto out_munmap;
567 }
568 nr_events[evsel->idx]++;
569 }
570
571 list_for_each_entry(evsel, &evlist->entries, node) {
572 if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) {
573 pr_debug("expected %d %s events, got %d\n",
574 expected_nr_events[evsel->idx],
575 event_name(evsel), nr_events[evsel->idx]);
576 goto out_munmap;
577 }
578 }
579
580 err = 0;
581out_munmap:
582 perf_evlist__munmap(evlist);
583out_close_fd:
584 for (i = 0; i < nsyscalls; ++i)
585 perf_evsel__close_fd(evsels[i], 1, threads->nr);
586out_free_evlist:
587 perf_evlist__delete(evlist);
588out_free_cpus:
589 cpu_map__delete(cpus);
590out_free_threads:
591 thread_map__delete(threads);
592 return err;
593#undef nsyscalls
594}
595
440static struct test { 596static struct test {
441 const char *desc; 597 const char *desc;
442 int (*func)(void); 598 int (*func)(void);
@@ -454,6 +610,10 @@ static struct test {
454 .func = test__open_syscall_event_on_all_cpus, 610 .func = test__open_syscall_event_on_all_cpus,
455 }, 611 },
456 { 612 {
613 .desc = "read samples using the mmap interface",
614 .func = test__basic_mmap,
615 },
616 {
457 .func = NULL, 617 .func = NULL,
458 }, 618 },
459}; 619};
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 746cf03cb05d..67c0459dc325 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -264,9 +264,6 @@ pid_put_sample(int pid, int type, unsigned int cpu, u64 start, u64 end)
264 c->start_time = start; 264 c->start_time = start;
265 if (p->start_time == 0 || p->start_time > start) 265 if (p->start_time == 0 || p->start_time > start)
266 p->start_time = start; 266 p->start_time = start;
267
268 if (cpu > numcpus)
269 numcpus = cpu;
270} 267}
271 268
272#define MAX_CPUS 4096 269#define MAX_CPUS 4096
@@ -276,21 +273,24 @@ static int cpus_cstate_state[MAX_CPUS];
276static u64 cpus_pstate_start_times[MAX_CPUS]; 273static u64 cpus_pstate_start_times[MAX_CPUS];
277static u64 cpus_pstate_state[MAX_CPUS]; 274static u64 cpus_pstate_state[MAX_CPUS];
278 275
279static int process_comm_event(event_t *event, struct sample_data *sample __used, 276static int process_comm_event(union perf_event *event,
277 struct perf_sample *sample __used,
280 struct perf_session *session __used) 278 struct perf_session *session __used)
281{ 279{
282 pid_set_comm(event->comm.tid, event->comm.comm); 280 pid_set_comm(event->comm.tid, event->comm.comm);
283 return 0; 281 return 0;
284} 282}
285 283
286static int process_fork_event(event_t *event, struct sample_data *sample __used, 284static int process_fork_event(union perf_event *event,
285 struct perf_sample *sample __used,
287 struct perf_session *session __used) 286 struct perf_session *session __used)
288{ 287{
289 pid_fork(event->fork.pid, event->fork.ppid, event->fork.time); 288 pid_fork(event->fork.pid, event->fork.ppid, event->fork.time);
290 return 0; 289 return 0;
291} 290}
292 291
293static int process_exit_event(event_t *event, struct sample_data *sample __used, 292static int process_exit_event(union perf_event *event,
293 struct perf_sample *sample __used,
294 struct perf_session *session __used) 294 struct perf_session *session __used)
295{ 295{
296 pid_exit(event->fork.pid, event->fork.time); 296 pid_exit(event->fork.pid, event->fork.time);
@@ -486,8 +486,8 @@ static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te)
486} 486}
487 487
488 488
489static int process_sample_event(event_t *event __used, 489static int process_sample_event(union perf_event *event __used,
490 struct sample_data *sample, 490 struct perf_sample *sample,
491 struct perf_session *session) 491 struct perf_session *session)
492{ 492{
493 struct trace_entry *te; 493 struct trace_entry *te;
@@ -511,6 +511,9 @@ static int process_sample_event(event_t *event __used,
511 if (!event_str) 511 if (!event_str)
512 return 0; 512 return 0;
513 513
514 if (sample->cpu > numcpus)
515 numcpus = sample->cpu;
516
514 if (strcmp(event_str, "power:cpu_idle") == 0) { 517 if (strcmp(event_str, "power:cpu_idle") == 0) {
515 struct power_processor_entry *ppe = (void *)te; 518 struct power_processor_entry *ppe = (void *)te;
516 if (ppe->state == (u32)PWR_EVENT_EXIT) 519 if (ppe->state == (u32)PWR_EVENT_EXIT)
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 5a29d9cd9486..80c9e062bd5b 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -20,11 +20,16 @@
20 20
21#include "perf.h" 21#include "perf.h"
22 22
23#include "util/annotate.h"
24#include "util/cache.h"
23#include "util/color.h" 25#include "util/color.h"
26#include "util/evlist.h"
24#include "util/evsel.h" 27#include "util/evsel.h"
25#include "util/session.h" 28#include "util/session.h"
26#include "util/symbol.h" 29#include "util/symbol.h"
27#include "util/thread.h" 30#include "util/thread.h"
31#include "util/thread_map.h"
32#include "util/top.h"
28#include "util/util.h" 33#include "util/util.h"
29#include <linux/rbtree.h> 34#include <linux/rbtree.h>
30#include "util/parse-options.h" 35#include "util/parse-options.h"
@@ -45,7 +50,6 @@
45#include <errno.h> 50#include <errno.h>
46#include <time.h> 51#include <time.h>
47#include <sched.h> 52#include <sched.h>
48#include <pthread.h>
49 53
50#include <sys/syscall.h> 54#include <sys/syscall.h>
51#include <sys/ioctl.h> 55#include <sys/ioctl.h>
@@ -60,85 +64,42 @@
60 64
61#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 65#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
62 66
67static struct perf_top top = {
68 .count_filter = 5,
69 .delay_secs = 2,
70 .display_weighted = -1,
71 .target_pid = -1,
72 .target_tid = -1,
73 .active_symbols = LIST_HEAD_INIT(top.active_symbols),
74 .active_symbols_lock = PTHREAD_MUTEX_INITIALIZER,
75 .active_symbols_cond = PTHREAD_COND_INITIALIZER,
76 .freq = 1000, /* 1 KHz */
77};
78
63static bool system_wide = false; 79static bool system_wide = false;
64 80
65static int default_interval = 0; 81static bool use_tui, use_stdio;
66 82
67static int count_filter = 5; 83static int default_interval = 0;
68static int print_entries;
69 84
70static int target_pid = -1;
71static int target_tid = -1;
72static struct thread_map *threads;
73static bool inherit = false; 85static bool inherit = false;
74static struct cpu_map *cpus;
75static int realtime_prio = 0; 86static int realtime_prio = 0;
76static bool group = false; 87static bool group = false;
77static unsigned int page_size; 88static unsigned int page_size;
78static unsigned int mmap_pages = 16; 89static unsigned int mmap_pages = 128;
79static int freq = 1000; /* 1 KHz */
80 90
81static int delay_secs = 2;
82static bool zero = false;
83static bool dump_symtab = false; 91static bool dump_symtab = false;
84 92
85static bool hide_kernel_symbols = false;
86static bool hide_user_symbols = false;
87static struct winsize winsize; 93static struct winsize winsize;
88 94
89/*
90 * Source
91 */
92
93struct source_line {
94 u64 eip;
95 unsigned long count[MAX_COUNTERS];
96 char *line;
97 struct source_line *next;
98};
99
100static const char *sym_filter = NULL; 95static const char *sym_filter = NULL;
101struct sym_entry *sym_filter_entry = NULL;
102struct sym_entry *sym_filter_entry_sched = NULL; 96struct sym_entry *sym_filter_entry_sched = NULL;
103static int sym_pcnt_filter = 5; 97static int sym_pcnt_filter = 5;
104static int sym_counter = 0;
105static struct perf_evsel *sym_evsel = NULL;
106static int display_weighted = -1;
107static const char *cpu_list;
108
109/*
110 * Symbols
111 */
112
113struct sym_entry_source {
114 struct source_line *source;
115 struct source_line *lines;
116 struct source_line **lines_tail;
117 pthread_mutex_t lock;
118};
119
120struct sym_entry {
121 struct rb_node rb_node;
122 struct list_head node;
123 unsigned long snap_count;
124 double weight;
125 int skip;
126 u16 name_len;
127 u8 origin;
128 struct map *map;
129 struct sym_entry_source *src;
130 unsigned long count[0];
131};
132 98
133/* 99/*
134 * Source functions 100 * Source functions
135 */ 101 */
136 102
137static inline struct symbol *sym_entry__symbol(struct sym_entry *self)
138{
139 return ((void *)self) + symbol_conf.priv_size;
140}
141
142void get_term_dimensions(struct winsize *ws) 103void get_term_dimensions(struct winsize *ws)
143{ 104{
144 char *s = getenv("LINES"); 105 char *s = getenv("LINES");
@@ -163,10 +124,10 @@ void get_term_dimensions(struct winsize *ws)
163 124
164static void update_print_entries(struct winsize *ws) 125static void update_print_entries(struct winsize *ws)
165{ 126{
166 print_entries = ws->ws_row; 127 top.print_entries = ws->ws_row;
167 128
168 if (print_entries > 9) 129 if (top.print_entries > 9)
169 print_entries -= 9; 130 top.print_entries -= 9;
170} 131}
171 132
172static void sig_winch_handler(int sig __used) 133static void sig_winch_handler(int sig __used)
@@ -178,12 +139,9 @@ static void sig_winch_handler(int sig __used)
178static int parse_source(struct sym_entry *syme) 139static int parse_source(struct sym_entry *syme)
179{ 140{
180 struct symbol *sym; 141 struct symbol *sym;
181 struct sym_entry_source *source; 142 struct annotation *notes;
182 struct map *map; 143 struct map *map;
183 FILE *file; 144 int err = -1;
184 char command[PATH_MAX*2];
185 const char *path;
186 u64 len;
187 145
188 if (!syme) 146 if (!syme)
189 return -1; 147 return -1;
@@ -194,411 +152,137 @@ static int parse_source(struct sym_entry *syme)
194 /* 152 /*
195 * We can't annotate with just /proc/kallsyms 153 * We can't annotate with just /proc/kallsyms
196 */ 154 */
197 if (map->dso->origin == DSO__ORIG_KERNEL) 155 if (map->dso->origin == DSO__ORIG_KERNEL) {
156 pr_err("Can't annotate %s: No vmlinux file was found in the "
157 "path\n", sym->name);
158 sleep(1);
198 return -1; 159 return -1;
199
200 if (syme->src == NULL) {
201 syme->src = zalloc(sizeof(*source));
202 if (syme->src == NULL)
203 return -1;
204 pthread_mutex_init(&syme->src->lock, NULL);
205 } 160 }
206 161
207 source = syme->src; 162 notes = symbol__annotation(sym);
208 163 if (notes->src != NULL) {
209 if (source->lines) { 164 pthread_mutex_lock(&notes->lock);
210 pthread_mutex_lock(&source->lock);
211 goto out_assign; 165 goto out_assign;
212 } 166 }
213 path = map->dso->long_name;
214
215 len = sym->end - sym->start;
216
217 sprintf(command,
218 "objdump --start-address=%#0*" PRIx64 " --stop-address=%#0*" PRIx64 " -dS %s",
219 BITS_PER_LONG / 4, map__rip_2objdump(map, sym->start),
220 BITS_PER_LONG / 4, map__rip_2objdump(map, sym->end), path);
221
222 file = popen(command, "r");
223 if (!file)
224 return -1;
225
226 pthread_mutex_lock(&source->lock);
227 source->lines_tail = &source->lines;
228 while (!feof(file)) {
229 struct source_line *src;
230 size_t dummy = 0;
231 char *c, *sep;
232 167
233 src = malloc(sizeof(struct source_line)); 168 pthread_mutex_lock(&notes->lock);
234 assert(src != NULL);
235 memset(src, 0, sizeof(struct source_line));
236 169
237 if (getline(&src->line, &dummy, file) < 0) 170 if (symbol__alloc_hist(sym, top.evlist->nr_entries) < 0) {
238 break; 171 pthread_mutex_unlock(&notes->lock);
239 if (!src->line) 172 pr_err("Not enough memory for annotating '%s' symbol!\n",
240 break; 173 sym->name);
241 174 sleep(1);
242 c = strchr(src->line, '\n'); 175 return err;
243 if (c)
244 *c = 0;
245
246 src->next = NULL;
247 *source->lines_tail = src;
248 source->lines_tail = &src->next;
249
250 src->eip = strtoull(src->line, &sep, 16);
251 if (*sep == ':')
252 src->eip = map__objdump_2ip(map, src->eip);
253 else /* this line has no ip info (e.g. source line) */
254 src->eip = 0;
255 } 176 }
256 pclose(file); 177
178 err = symbol__annotate(sym, syme->map, 0);
179 if (err == 0) {
257out_assign: 180out_assign:
258 sym_filter_entry = syme; 181 top.sym_filter_entry = syme;
259 pthread_mutex_unlock(&source->lock); 182 }
260 return 0; 183
184 pthread_mutex_unlock(&notes->lock);
185 return err;
261} 186}
262 187
263static void __zero_source_counters(struct sym_entry *syme) 188static void __zero_source_counters(struct sym_entry *syme)
264{ 189{
265 int i; 190 struct symbol *sym = sym_entry__symbol(syme);
266 struct source_line *line; 191 symbol__annotate_zero_histograms(sym);
267
268 line = syme->src->lines;
269 while (line) {
270 for (i = 0; i < nr_counters; i++)
271 line->count[i] = 0;
272 line = line->next;
273 }
274} 192}
275 193
276static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip) 194static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
277{ 195{
278 struct source_line *line; 196 struct annotation *notes;
279 197 struct symbol *sym;
280 if (syme != sym_filter_entry)
281 return;
282 198
283 if (pthread_mutex_trylock(&syme->src->lock)) 199 if (syme != top.sym_filter_entry)
284 return; 200 return;
285 201
286 if (syme->src == NULL || syme->src->source == NULL) 202 sym = sym_entry__symbol(syme);
287 goto out_unlock; 203 notes = symbol__annotation(sym);
288
289 for (line = syme->src->lines; line; line = line->next) {
290 /* skip lines without IP info */
291 if (line->eip == 0)
292 continue;
293 if (line->eip == ip) {
294 line->count[counter]++;
295 break;
296 }
297 if (line->eip > ip)
298 break;
299 }
300out_unlock:
301 pthread_mutex_unlock(&syme->src->lock);
302}
303
304#define PATTERN_LEN (BITS_PER_LONG / 4 + 2)
305
306static void lookup_sym_source(struct sym_entry *syme)
307{
308 struct symbol *symbol = sym_entry__symbol(syme);
309 struct source_line *line;
310 char pattern[PATTERN_LEN + 1];
311
312 sprintf(pattern, "%0*" PRIx64 " <", BITS_PER_LONG / 4,
313 map__rip_2objdump(syme->map, symbol->start));
314
315 pthread_mutex_lock(&syme->src->lock);
316 for (line = syme->src->lines; line; line = line->next) {
317 if (memcmp(line->line, pattern, PATTERN_LEN) == 0) {
318 syme->src->source = line;
319 break;
320 }
321 }
322 pthread_mutex_unlock(&syme->src->lock);
323}
324 204
325static void show_lines(struct source_line *queue, int count, int total) 205 if (pthread_mutex_trylock(&notes->lock))
326{ 206 return;
327 int i;
328 struct source_line *line;
329 207
330 line = queue; 208 ip = syme->map->map_ip(syme->map, ip);
331 for (i = 0; i < count; i++) { 209 symbol__inc_addr_samples(sym, syme->map, counter, ip);
332 float pcnt = 100.0*(float)line->count[sym_counter]/(float)total;
333 210
334 printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line); 211 pthread_mutex_unlock(&notes->lock);
335 line = line->next;
336 }
337} 212}
338 213
339#define TRACE_COUNT 3
340
341static void show_details(struct sym_entry *syme) 214static void show_details(struct sym_entry *syme)
342{ 215{
216 struct annotation *notes;
343 struct symbol *symbol; 217 struct symbol *symbol;
344 struct source_line *line; 218 int more;
345 struct source_line *line_queue = NULL;
346 int displayed = 0;
347 int line_queue_count = 0, total = 0, more = 0;
348 219
349 if (!syme) 220 if (!syme)
350 return; 221 return;
351 222
352 if (!syme->src->source)
353 lookup_sym_source(syme);
354
355 if (!syme->src->source)
356 return;
357
358 symbol = sym_entry__symbol(syme); 223 symbol = sym_entry__symbol(syme);
359 printf("Showing %s for %s\n", event_name(sym_evsel), symbol->name); 224 notes = symbol__annotation(symbol);
360 printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter);
361
362 pthread_mutex_lock(&syme->src->lock);
363 line = syme->src->source;
364 while (line) {
365 total += line->count[sym_counter];
366 line = line->next;
367 }
368
369 line = syme->src->source;
370 while (line) {
371 float pcnt = 0.0;
372
373 if (!line_queue_count)
374 line_queue = line;
375 line_queue_count++;
376
377 if (line->count[sym_counter])
378 pcnt = 100.0 * line->count[sym_counter] / (float)total;
379 if (pcnt >= (float)sym_pcnt_filter) {
380 if (displayed <= print_entries)
381 show_lines(line_queue, line_queue_count, total);
382 else more++;
383 displayed += line_queue_count;
384 line_queue_count = 0;
385 line_queue = NULL;
386 } else if (line_queue_count > TRACE_COUNT) {
387 line_queue = line_queue->next;
388 line_queue_count--;
389 }
390
391 line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8;
392 line = line->next;
393 }
394 pthread_mutex_unlock(&syme->src->lock);
395 if (more)
396 printf("%d lines not displayed, maybe increase display entries [e]\n", more);
397}
398 225
399/* 226 pthread_mutex_lock(&notes->lock);
400 * Symbols will be added here in event__process_sample and will get out
401 * after decayed.
402 */
403static LIST_HEAD(active_symbols);
404static pthread_mutex_t active_symbols_lock = PTHREAD_MUTEX_INITIALIZER;
405
406/*
407 * Ordering weight: count-1 * count-2 * ... / count-n
408 */
409static double sym_weight(const struct sym_entry *sym)
410{
411 double weight = sym->snap_count;
412 int counter;
413
414 if (!display_weighted)
415 return weight;
416 227
417 for (counter = 1; counter < nr_counters-1; counter++) 228 if (notes->src == NULL)
418 weight *= sym->count[counter]; 229 goto out_unlock;
419 230
420 weight /= (sym->count[counter] + 1); 231 printf("Showing %s for %s\n", event_name(top.sym_evsel), symbol->name);
232 printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter);
421 233
422 return weight; 234 more = symbol__annotate_printf(symbol, syme->map, top.sym_evsel->idx,
235 0, sym_pcnt_filter, top.print_entries, 4);
236 if (top.zero)
237 symbol__annotate_zero_histogram(symbol, top.sym_evsel->idx);
238 else
239 symbol__annotate_decay_histogram(symbol, top.sym_evsel->idx);
240 if (more != 0)
241 printf("%d lines not displayed, maybe increase display entries [e]\n", more);
242out_unlock:
243 pthread_mutex_unlock(&notes->lock);
423} 244}
424 245
425static long samples;
426static long kernel_samples, us_samples;
427static long exact_samples;
428static long guest_us_samples, guest_kernel_samples;
429static const char CONSOLE_CLEAR[] = ""; 246static const char CONSOLE_CLEAR[] = "";
430 247
431static void __list_insert_active_sym(struct sym_entry *syme) 248static void __list_insert_active_sym(struct sym_entry *syme)
432{ 249{
433 list_add(&syme->node, &active_symbols); 250 list_add(&syme->node, &top.active_symbols);
434}
435
436static void list_remove_active_sym(struct sym_entry *syme)
437{
438 pthread_mutex_lock(&active_symbols_lock);
439 list_del_init(&syme->node);
440 pthread_mutex_unlock(&active_symbols_lock);
441} 251}
442 252
443static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se) 253static void print_sym_table(struct perf_session *session)
444{ 254{
445 struct rb_node **p = &tree->rb_node; 255 char bf[160];
446 struct rb_node *parent = NULL; 256 int printed = 0;
447 struct sym_entry *iter;
448
449 while (*p != NULL) {
450 parent = *p;
451 iter = rb_entry(parent, struct sym_entry, rb_node);
452
453 if (se->weight > iter->weight)
454 p = &(*p)->rb_left;
455 else
456 p = &(*p)->rb_right;
457 }
458
459 rb_link_node(&se->rb_node, parent, p);
460 rb_insert_color(&se->rb_node, tree);
461}
462
463static void print_sym_table(void)
464{
465 int printed = 0, j;
466 struct perf_evsel *counter;
467 int snap = !display_weighted ? sym_counter : 0;
468 float samples_per_sec = samples/delay_secs;
469 float ksamples_per_sec = kernel_samples/delay_secs;
470 float us_samples_per_sec = (us_samples)/delay_secs;
471 float guest_kernel_samples_per_sec = (guest_kernel_samples)/delay_secs;
472 float guest_us_samples_per_sec = (guest_us_samples)/delay_secs;
473 float esamples_percent = (100.0*exact_samples)/samples;
474 float sum_ksamples = 0.0;
475 struct sym_entry *syme, *n;
476 struct rb_root tmp = RB_ROOT;
477 struct rb_node *nd; 257 struct rb_node *nd;
478 int sym_width = 0, dso_width = 0, dso_short_width = 0; 258 struct sym_entry *syme;
259 struct rb_root tmp = RB_ROOT;
479 const int win_width = winsize.ws_col - 1; 260 const int win_width = winsize.ws_col - 1;
480 261 int sym_width, dso_width, dso_short_width;
481 samples = us_samples = kernel_samples = exact_samples = 0; 262 float sum_ksamples = perf_top__decay_samples(&top, &tmp);
482 guest_kernel_samples = guest_us_samples = 0;
483
484 /* Sort the active symbols */
485 pthread_mutex_lock(&active_symbols_lock);
486 syme = list_entry(active_symbols.next, struct sym_entry, node);
487 pthread_mutex_unlock(&active_symbols_lock);
488
489 list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
490 syme->snap_count = syme->count[snap];
491 if (syme->snap_count != 0) {
492
493 if ((hide_user_symbols &&
494 syme->origin == PERF_RECORD_MISC_USER) ||
495 (hide_kernel_symbols &&
496 syme->origin == PERF_RECORD_MISC_KERNEL)) {
497 list_remove_active_sym(syme);
498 continue;
499 }
500 syme->weight = sym_weight(syme);
501 rb_insert_active_sym(&tmp, syme);
502 sum_ksamples += syme->snap_count;
503
504 for (j = 0; j < nr_counters; j++)
505 syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8;
506 } else
507 list_remove_active_sym(syme);
508 }
509 263
510 puts(CONSOLE_CLEAR); 264 puts(CONSOLE_CLEAR);
511 265
512 printf("%-*.*s\n", win_width, win_width, graph_dotted_line); 266 perf_top__header_snprintf(&top, bf, sizeof(bf));
513 if (!perf_guest) { 267 printf("%s\n", bf);
514 printf(" PerfTop:%8.0f irqs/sec kernel:%4.1f%%"
515 " exact: %4.1f%% [",
516 samples_per_sec,
517 100.0 - (100.0 * ((samples_per_sec - ksamples_per_sec) /
518 samples_per_sec)),
519 esamples_percent);
520 } else {
521 printf(" PerfTop:%8.0f irqs/sec kernel:%4.1f%% us:%4.1f%%"
522 " guest kernel:%4.1f%% guest us:%4.1f%%"
523 " exact: %4.1f%% [",
524 samples_per_sec,
525 100.0 - (100.0 * ((samples_per_sec-ksamples_per_sec) /
526 samples_per_sec)),
527 100.0 - (100.0 * ((samples_per_sec-us_samples_per_sec) /
528 samples_per_sec)),
529 100.0 - (100.0 * ((samples_per_sec -
530 guest_kernel_samples_per_sec) /
531 samples_per_sec)),
532 100.0 - (100.0 * ((samples_per_sec -
533 guest_us_samples_per_sec) /
534 samples_per_sec)),
535 esamples_percent);
536 }
537
538 if (nr_counters == 1 || !display_weighted) {
539 struct perf_evsel *first;
540 first = list_entry(evsel_list.next, struct perf_evsel, node);
541 printf("%" PRIu64, (uint64_t)first->attr.sample_period);
542 if (freq)
543 printf("Hz ");
544 else
545 printf(" ");
546 }
547
548 if (!display_weighted)
549 printf("%s", event_name(sym_evsel));
550 else list_for_each_entry(counter, &evsel_list, node) {
551 if (counter->idx)
552 printf("/");
553
554 printf("%s", event_name(counter));
555 }
556 268
557 printf( "], "); 269 perf_top__reset_sample_counters(&top);
558
559 if (target_pid != -1)
560 printf(" (target_pid: %d", target_pid);
561 else if (target_tid != -1)
562 printf(" (target_tid: %d", target_tid);
563 else
564 printf(" (all");
565
566 if (cpu_list)
567 printf(", CPU%s: %s)\n", cpus->nr > 1 ? "s" : "", cpu_list);
568 else {
569 if (target_tid != -1)
570 printf(")\n");
571 else
572 printf(", %d CPU%s)\n", cpus->nr, cpus->nr > 1 ? "s" : "");
573 }
574 270
575 printf("%-*.*s\n", win_width, win_width, graph_dotted_line); 271 printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
576 272
577 if (sym_filter_entry) { 273 if (session->hists.stats.total_lost != 0) {
578 show_details(sym_filter_entry); 274 color_fprintf(stdout, PERF_COLOR_RED, "WARNING:");
579 return; 275 printf(" LOST %" PRIu64 " events, Check IO/CPU overload\n",
276 session->hists.stats.total_lost);
580 } 277 }
581 278
582 /* 279 if (top.sym_filter_entry) {
583 * Find the longest symbol name that will be displayed 280 show_details(top.sym_filter_entry);
584 */ 281 return;
585 for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
586 syme = rb_entry(nd, struct sym_entry, rb_node);
587 if (++printed > print_entries ||
588 (int)syme->snap_count < count_filter)
589 continue;
590
591 if (syme->map->dso->long_name_len > dso_width)
592 dso_width = syme->map->dso->long_name_len;
593
594 if (syme->map->dso->short_name_len > dso_short_width)
595 dso_short_width = syme->map->dso->short_name_len;
596
597 if (syme->name_len > sym_width)
598 sym_width = syme->name_len;
599 } 282 }
600 283
601 printed = 0; 284 perf_top__find_widths(&top, &tmp, &dso_width, &dso_short_width,
285 &sym_width);
602 286
603 if (sym_width + dso_width > winsize.ws_col - 29) { 287 if (sym_width + dso_width > winsize.ws_col - 29) {
604 dso_width = dso_short_width; 288 dso_width = dso_short_width;
@@ -606,7 +290,7 @@ static void print_sym_table(void)
606 sym_width = winsize.ws_col - dso_width - 29; 290 sym_width = winsize.ws_col - dso_width - 29;
607 } 291 }
608 putchar('\n'); 292 putchar('\n');
609 if (nr_counters == 1) 293 if (top.evlist->nr_entries == 1)
610 printf(" samples pcnt"); 294 printf(" samples pcnt");
611 else 295 else
612 printf(" weight samples pcnt"); 296 printf(" weight samples pcnt");
@@ -615,7 +299,7 @@ static void print_sym_table(void)
615 printf(" RIP "); 299 printf(" RIP ");
616 printf(" %-*.*s DSO\n", sym_width, sym_width, "function"); 300 printf(" %-*.*s DSO\n", sym_width, sym_width, "function");
617 printf(" %s _______ _____", 301 printf(" %s _______ _____",
618 nr_counters == 1 ? " " : "______"); 302 top.evlist->nr_entries == 1 ? " " : "______");
619 if (verbose) 303 if (verbose)
620 printf(" ________________"); 304 printf(" ________________");
621 printf(" %-*.*s", sym_width, sym_width, graph_line); 305 printf(" %-*.*s", sym_width, sym_width, graph_line);
@@ -628,13 +312,14 @@ static void print_sym_table(void)
628 312
629 syme = rb_entry(nd, struct sym_entry, rb_node); 313 syme = rb_entry(nd, struct sym_entry, rb_node);
630 sym = sym_entry__symbol(syme); 314 sym = sym_entry__symbol(syme);
631 if (++printed > print_entries || (int)syme->snap_count < count_filter) 315 if (++printed > top.print_entries ||
316 (int)syme->snap_count < top.count_filter)
632 continue; 317 continue;
633 318
634 pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / 319 pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
635 sum_ksamples)); 320 sum_ksamples));
636 321
637 if (nr_counters == 1 || !display_weighted) 322 if (top.evlist->nr_entries == 1 || !top.display_weighted)
638 printf("%20.2f ", syme->weight); 323 printf("%20.2f ", syme->weight);
639 else 324 else
640 printf("%9.1f %10ld ", syme->weight, syme->snap_count); 325 printf("%9.1f %10ld ", syme->weight, syme->snap_count);
@@ -693,10 +378,8 @@ static void prompt_symbol(struct sym_entry **target, const char *msg)
693 378
694 /* zero counters of active symbol */ 379 /* zero counters of active symbol */
695 if (syme) { 380 if (syme) {
696 pthread_mutex_lock(&syme->src->lock);
697 __zero_source_counters(syme); 381 __zero_source_counters(syme);
698 *target = NULL; 382 *target = NULL;
699 pthread_mutex_unlock(&syme->src->lock);
700 } 383 }
701 384
702 fprintf(stdout, "\n%s: ", msg); 385 fprintf(stdout, "\n%s: ", msg);
@@ -707,11 +390,11 @@ static void prompt_symbol(struct sym_entry **target, const char *msg)
707 if (p) 390 if (p)
708 *p = 0; 391 *p = 0;
709 392
710 pthread_mutex_lock(&active_symbols_lock); 393 pthread_mutex_lock(&top.active_symbols_lock);
711 syme = list_entry(active_symbols.next, struct sym_entry, node); 394 syme = list_entry(top.active_symbols.next, struct sym_entry, node);
712 pthread_mutex_unlock(&active_symbols_lock); 395 pthread_mutex_unlock(&top.active_symbols_lock);
713 396
714 list_for_each_entry_safe_from(syme, n, &active_symbols, node) { 397 list_for_each_entry_safe_from(syme, n, &top.active_symbols, node) {
715 struct symbol *sym = sym_entry__symbol(syme); 398 struct symbol *sym = sym_entry__symbol(syme);
716 399
717 if (!strcmp(buf, sym->name)) { 400 if (!strcmp(buf, sym->name)) {
@@ -735,34 +418,34 @@ static void print_mapped_keys(void)
735{ 418{
736 char *name = NULL; 419 char *name = NULL;
737 420
738 if (sym_filter_entry) { 421 if (top.sym_filter_entry) {
739 struct symbol *sym = sym_entry__symbol(sym_filter_entry); 422 struct symbol *sym = sym_entry__symbol(top.sym_filter_entry);
740 name = sym->name; 423 name = sym->name;
741 } 424 }
742 425
743 fprintf(stdout, "\nMapped keys:\n"); 426 fprintf(stdout, "\nMapped keys:\n");
744 fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs); 427 fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", top.delay_secs);
745 fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries); 428 fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", top.print_entries);
746 429
747 if (nr_counters > 1) 430 if (top.evlist->nr_entries > 1)
748 fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_evsel)); 431 fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(top.sym_evsel));
749 432
750 fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter); 433 fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top.count_filter);
751 434
752 fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter); 435 fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
753 fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); 436 fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL");
754 fprintf(stdout, "\t[S] stop annotation.\n"); 437 fprintf(stdout, "\t[S] stop annotation.\n");
755 438
756 if (nr_counters > 1) 439 if (top.evlist->nr_entries > 1)
757 fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0); 440 fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", top.display_weighted ? 1 : 0);
758 441
759 fprintf(stdout, 442 fprintf(stdout,
760 "\t[K] hide kernel_symbols symbols. \t(%s)\n", 443 "\t[K] hide kernel_symbols symbols. \t(%s)\n",
761 hide_kernel_symbols ? "yes" : "no"); 444 top.hide_kernel_symbols ? "yes" : "no");
762 fprintf(stdout, 445 fprintf(stdout,
763 "\t[U] hide user symbols. \t(%s)\n", 446 "\t[U] hide user symbols. \t(%s)\n",
764 hide_user_symbols ? "yes" : "no"); 447 top.hide_user_symbols ? "yes" : "no");
765 fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0); 448 fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", top.zero ? 1 : 0);
766 fprintf(stdout, "\t[qQ] quit.\n"); 449 fprintf(stdout, "\t[qQ] quit.\n");
767} 450}
768 451
@@ -783,7 +466,7 @@ static int key_mapped(int c)
783 return 1; 466 return 1;
784 case 'E': 467 case 'E':
785 case 'w': 468 case 'w':
786 return nr_counters > 1 ? 1 : 0; 469 return top.evlist->nr_entries > 1 ? 1 : 0;
787 default: 470 default:
788 break; 471 break;
789 } 472 }
@@ -818,47 +501,47 @@ static void handle_keypress(struct perf_session *session, int c)
818 501
819 switch (c) { 502 switch (c) {
820 case 'd': 503 case 'd':
821 prompt_integer(&delay_secs, "Enter display delay"); 504 prompt_integer(&top.delay_secs, "Enter display delay");
822 if (delay_secs < 1) 505 if (top.delay_secs < 1)
823 delay_secs = 1; 506 top.delay_secs = 1;
824 break; 507 break;
825 case 'e': 508 case 'e':
826 prompt_integer(&print_entries, "Enter display entries (lines)"); 509 prompt_integer(&top.print_entries, "Enter display entries (lines)");
827 if (print_entries == 0) { 510 if (top.print_entries == 0) {
828 sig_winch_handler(SIGWINCH); 511 sig_winch_handler(SIGWINCH);
829 signal(SIGWINCH, sig_winch_handler); 512 signal(SIGWINCH, sig_winch_handler);
830 } else 513 } else
831 signal(SIGWINCH, SIG_DFL); 514 signal(SIGWINCH, SIG_DFL);
832 break; 515 break;
833 case 'E': 516 case 'E':
834 if (nr_counters > 1) { 517 if (top.evlist->nr_entries > 1) {
835 fprintf(stderr, "\nAvailable events:"); 518 fprintf(stderr, "\nAvailable events:");
836 519
837 list_for_each_entry(sym_evsel, &evsel_list, node) 520 list_for_each_entry(top.sym_evsel, &top.evlist->entries, node)
838 fprintf(stderr, "\n\t%d %s", sym_evsel->idx, event_name(sym_evsel)); 521 fprintf(stderr, "\n\t%d %s", top.sym_evsel->idx, event_name(top.sym_evsel));
839 522
840 prompt_integer(&sym_counter, "Enter details event counter"); 523 prompt_integer(&top.sym_counter, "Enter details event counter");
841 524
842 if (sym_counter >= nr_counters) { 525 if (top.sym_counter >= top.evlist->nr_entries) {
843 sym_evsel = list_entry(evsel_list.next, struct perf_evsel, node); 526 top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
844 sym_counter = 0; 527 top.sym_counter = 0;
845 fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(sym_evsel)); 528 fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top.sym_evsel));
846 sleep(1); 529 sleep(1);
847 break; 530 break;
848 } 531 }
849 list_for_each_entry(sym_evsel, &evsel_list, node) 532 list_for_each_entry(top.sym_evsel, &top.evlist->entries, node)
850 if (sym_evsel->idx == sym_counter) 533 if (top.sym_evsel->idx == top.sym_counter)
851 break; 534 break;
852 } else sym_counter = 0; 535 } else top.sym_counter = 0;
853 break; 536 break;
854 case 'f': 537 case 'f':
855 prompt_integer(&count_filter, "Enter display event count filter"); 538 prompt_integer(&top.count_filter, "Enter display event count filter");
856 break; 539 break;
857 case 'F': 540 case 'F':
858 prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)"); 541 prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)");
859 break; 542 break;
860 case 'K': 543 case 'K':
861 hide_kernel_symbols = !hide_kernel_symbols; 544 top.hide_kernel_symbols = !top.hide_kernel_symbols;
862 break; 545 break;
863 case 'q': 546 case 'q':
864 case 'Q': 547 case 'Q':
@@ -867,34 +550,50 @@ static void handle_keypress(struct perf_session *session, int c)
867 perf_session__fprintf_dsos(session, stderr); 550 perf_session__fprintf_dsos(session, stderr);
868 exit(0); 551 exit(0);
869 case 's': 552 case 's':
870 prompt_symbol(&sym_filter_entry, "Enter details symbol"); 553 prompt_symbol(&top.sym_filter_entry, "Enter details symbol");
871 break; 554 break;
872 case 'S': 555 case 'S':
873 if (!sym_filter_entry) 556 if (!top.sym_filter_entry)
874 break; 557 break;
875 else { 558 else {
876 struct sym_entry *syme = sym_filter_entry; 559 struct sym_entry *syme = top.sym_filter_entry;
877 560
878 pthread_mutex_lock(&syme->src->lock); 561 top.sym_filter_entry = NULL;
879 sym_filter_entry = NULL;
880 __zero_source_counters(syme); 562 __zero_source_counters(syme);
881 pthread_mutex_unlock(&syme->src->lock);
882 } 563 }
883 break; 564 break;
884 case 'U': 565 case 'U':
885 hide_user_symbols = !hide_user_symbols; 566 top.hide_user_symbols = !top.hide_user_symbols;
886 break; 567 break;
887 case 'w': 568 case 'w':
888 display_weighted = ~display_weighted; 569 top.display_weighted = ~top.display_weighted;
889 break; 570 break;
890 case 'z': 571 case 'z':
891 zero = !zero; 572 top.zero = !top.zero;
892 break; 573 break;
893 default: 574 default:
894 break; 575 break;
895 } 576 }
896} 577}
897 578
579static void *display_thread_tui(void *arg __used)
580{
581 int err = 0;
582 pthread_mutex_lock(&top.active_symbols_lock);
583 while (list_empty(&top.active_symbols)) {
584 err = pthread_cond_wait(&top.active_symbols_cond,
585 &top.active_symbols_lock);
586 if (err)
587 break;
588 }
589 pthread_mutex_unlock(&top.active_symbols_lock);
590 if (!err)
591 perf_top__tui_browser(&top);
592 exit_browser(0);
593 exit(0);
594 return NULL;
595}
596
898static void *display_thread(void *arg __used) 597static void *display_thread(void *arg __used)
899{ 598{
900 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; 599 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
@@ -909,13 +608,13 @@ static void *display_thread(void *arg __used)
909 tc.c_cc[VTIME] = 0; 608 tc.c_cc[VTIME] = 0;
910 609
911repeat: 610repeat:
912 delay_msecs = delay_secs * 1000; 611 delay_msecs = top.delay_secs * 1000;
913 tcsetattr(0, TCSANOW, &tc); 612 tcsetattr(0, TCSANOW, &tc);
914 /* trash return*/ 613 /* trash return*/
915 getc(stdin); 614 getc(stdin);
916 615
917 do { 616 do {
918 print_sym_table(); 617 print_sym_table(session);
919 } while (!poll(&stdin_poll, 1, delay_msecs) == 1); 618 } while (!poll(&stdin_poll, 1, delay_msecs) == 1);
920 619
921 c = getc(stdin); 620 c = getc(stdin);
@@ -930,6 +629,7 @@ repeat:
930/* Tag samples to be skipped. */ 629/* Tag samples to be skipped. */
931static const char *skip_symbols[] = { 630static const char *skip_symbols[] = {
932 "default_idle", 631 "default_idle",
632 "native_safe_halt",
933 "cpu_idle", 633 "cpu_idle",
934 "enter_idle", 634 "enter_idle",
935 "exit_idle", 635 "exit_idle",
@@ -965,9 +665,9 @@ static int symbol_filter(struct map *map, struct symbol *sym)
965 665
966 syme = symbol__priv(sym); 666 syme = symbol__priv(sym);
967 syme->map = map; 667 syme->map = map;
968 syme->src = NULL; 668 symbol__annotate_init(map, sym);
969 669
970 if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) { 670 if (!top.sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) {
971 /* schedule initial sym_filter_entry setup */ 671 /* schedule initial sym_filter_entry setup */
972 sym_filter_entry_sched = syme; 672 sym_filter_entry_sched = syme;
973 sym_filter = NULL; 673 sym_filter = NULL;
@@ -980,44 +680,40 @@ static int symbol_filter(struct map *map, struct symbol *sym)
980 } 680 }
981 } 681 }
982 682
983 if (!syme->skip)
984 syme->name_len = strlen(sym->name);
985
986 return 0; 683 return 0;
987} 684}
988 685
989static void event__process_sample(const event_t *self, 686static void perf_event__process_sample(const union perf_event *event,
990 struct sample_data *sample, 687 struct perf_sample *sample,
991 struct perf_session *session, 688 struct perf_session *session)
992 struct perf_evsel *evsel)
993{ 689{
994 u64 ip = self->ip.ip; 690 u64 ip = event->ip.ip;
995 struct sym_entry *syme; 691 struct sym_entry *syme;
996 struct addr_location al; 692 struct addr_location al;
997 struct machine *machine; 693 struct machine *machine;
998 u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 694 u8 origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
999 695
1000 ++samples; 696 ++top.samples;
1001 697
1002 switch (origin) { 698 switch (origin) {
1003 case PERF_RECORD_MISC_USER: 699 case PERF_RECORD_MISC_USER:
1004 ++us_samples; 700 ++top.us_samples;
1005 if (hide_user_symbols) 701 if (top.hide_user_symbols)
1006 return; 702 return;
1007 machine = perf_session__find_host_machine(session); 703 machine = perf_session__find_host_machine(session);
1008 break; 704 break;
1009 case PERF_RECORD_MISC_KERNEL: 705 case PERF_RECORD_MISC_KERNEL:
1010 ++kernel_samples; 706 ++top.kernel_samples;
1011 if (hide_kernel_symbols) 707 if (top.hide_kernel_symbols)
1012 return; 708 return;
1013 machine = perf_session__find_host_machine(session); 709 machine = perf_session__find_host_machine(session);
1014 break; 710 break;
1015 case PERF_RECORD_MISC_GUEST_KERNEL: 711 case PERF_RECORD_MISC_GUEST_KERNEL:
1016 ++guest_kernel_samples; 712 ++top.guest_kernel_samples;
1017 machine = perf_session__find_machine(session, self->ip.pid); 713 machine = perf_session__find_machine(session, event->ip.pid);
1018 break; 714 break;
1019 case PERF_RECORD_MISC_GUEST_USER: 715 case PERF_RECORD_MISC_GUEST_USER:
1020 ++guest_us_samples; 716 ++top.guest_us_samples;
1021 /* 717 /*
1022 * TODO: we don't process guest user from host side 718 * TODO: we don't process guest user from host side
1023 * except simple counting. 719 * except simple counting.
@@ -1029,15 +725,15 @@ static void event__process_sample(const event_t *self,
1029 725
1030 if (!machine && perf_guest) { 726 if (!machine && perf_guest) {
1031 pr_err("Can't find guest [%d]'s kernel information\n", 727 pr_err("Can't find guest [%d]'s kernel information\n",
1032 self->ip.pid); 728 event->ip.pid);
1033 return; 729 return;
1034 } 730 }
1035 731
1036 if (self->header.misc & PERF_RECORD_MISC_EXACT_IP) 732 if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
1037 exact_samples++; 733 top.exact_samples++;
1038 734
1039 if (event__preprocess_sample(self, session, &al, sample, 735 if (perf_event__preprocess_sample(event, session, &al, sample,
1040 symbol_filter) < 0 || 736 symbol_filter) < 0 ||
1041 al.filtered) 737 al.filtered)
1042 return; 738 return;
1043 739
@@ -1055,8 +751,9 @@ static void event__process_sample(const event_t *self,
1055 */ 751 */
1056 if (al.map == machine->vmlinux_maps[MAP__FUNCTION] && 752 if (al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
1057 RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) { 753 RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
1058 pr_err("The %s file can't be used\n", 754 ui__warning("The %s file can't be used\n",
1059 symbol_conf.vmlinux_name); 755 symbol_conf.vmlinux_name);
756 exit_browser(0);
1060 exit(1); 757 exit(1);
1061 } 758 }
1062 759
@@ -1065,13 +762,13 @@ static void event__process_sample(const event_t *self,
1065 762
1066 /* let's see, whether we need to install initial sym_filter_entry */ 763 /* let's see, whether we need to install initial sym_filter_entry */
1067 if (sym_filter_entry_sched) { 764 if (sym_filter_entry_sched) {
1068 sym_filter_entry = sym_filter_entry_sched; 765 top.sym_filter_entry = sym_filter_entry_sched;
1069 sym_filter_entry_sched = NULL; 766 sym_filter_entry_sched = NULL;
1070 if (parse_source(sym_filter_entry) < 0) { 767 if (parse_source(top.sym_filter_entry) < 0) {
1071 struct symbol *sym = sym_entry__symbol(sym_filter_entry); 768 struct symbol *sym = sym_entry__symbol(top.sym_filter_entry);
1072 769
1073 pr_err("Can't annotate %s", sym->name); 770 pr_err("Can't annotate %s", sym->name);
1074 if (sym_filter_entry->map->dso->origin == DSO__ORIG_KERNEL) { 771 if (top.sym_filter_entry->map->dso->origin == DSO__ORIG_KERNEL) {
1075 pr_err(": No vmlinux file was found in the path:\n"); 772 pr_err(": No vmlinux file was found in the path:\n");
1076 machine__fprintf_vmlinux_path(machine, stderr); 773 machine__fprintf_vmlinux_path(machine, stderr);
1077 } else 774 } else
@@ -1082,166 +779,73 @@ static void event__process_sample(const event_t *self,
1082 779
1083 syme = symbol__priv(al.sym); 780 syme = symbol__priv(al.sym);
1084 if (!syme->skip) { 781 if (!syme->skip) {
1085 syme->count[evsel->idx]++; 782 struct perf_evsel *evsel;
783
1086 syme->origin = origin; 784 syme->origin = origin;
785 evsel = perf_evlist__id2evsel(top.evlist, sample->id);
786 assert(evsel != NULL);
787 syme->count[evsel->idx]++;
1087 record_precise_ip(syme, evsel->idx, ip); 788 record_precise_ip(syme, evsel->idx, ip);
1088 pthread_mutex_lock(&active_symbols_lock); 789 pthread_mutex_lock(&top.active_symbols_lock);
1089 if (list_empty(&syme->node) || !syme->node.next) 790 if (list_empty(&syme->node) || !syme->node.next) {
791 static bool first = true;
1090 __list_insert_active_sym(syme); 792 __list_insert_active_sym(syme);
1091 pthread_mutex_unlock(&active_symbols_lock); 793 if (first) {
794 pthread_cond_broadcast(&top.active_symbols_cond);
795 first = false;
796 }
797 }
798 pthread_mutex_unlock(&top.active_symbols_lock);
1092 } 799 }
1093} 800}
1094 801
1095struct mmap_data { 802static void perf_session__mmap_read_cpu(struct perf_session *self, int cpu)
1096 void *base;
1097 int mask;
1098 unsigned int prev;
1099};
1100
1101static int perf_evsel__alloc_mmap_per_thread(struct perf_evsel *evsel,
1102 int ncpus, int nthreads)
1103{
1104 evsel->priv = xyarray__new(ncpus, nthreads, sizeof(struct mmap_data));
1105 return evsel->priv != NULL ? 0 : -ENOMEM;
1106}
1107
1108static void perf_evsel__free_mmap(struct perf_evsel *evsel)
1109{
1110 xyarray__delete(evsel->priv);
1111 evsel->priv = NULL;
1112}
1113
1114static unsigned int mmap_read_head(struct mmap_data *md)
1115{
1116 struct perf_event_mmap_page *pc = md->base;
1117 int head;
1118
1119 head = pc->data_head;
1120 rmb();
1121
1122 return head;
1123}
1124
1125static void perf_session__mmap_read_counter(struct perf_session *self,
1126 struct perf_evsel *evsel,
1127 int cpu, int thread_idx)
1128{ 803{
1129 struct xyarray *mmap_array = evsel->priv; 804 struct perf_sample sample;
1130 struct mmap_data *md = xyarray__entry(mmap_array, cpu, thread_idx); 805 union perf_event *event;
1131 unsigned int head = mmap_read_head(md);
1132 unsigned int old = md->prev;
1133 unsigned char *data = md->base + page_size;
1134 struct sample_data sample;
1135 int diff;
1136
1137 /*
1138 * If we're further behind than half the buffer, there's a chance
1139 * the writer will bite our tail and mess up the samples under us.
1140 *
1141 * If we somehow ended up ahead of the head, we got messed up.
1142 *
1143 * In either case, truncate and restart at head.
1144 */
1145 diff = head - old;
1146 if (diff > md->mask / 2 || diff < 0) {
1147 fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
1148
1149 /*
1150 * head points to a known good entry, start there.
1151 */
1152 old = head;
1153 }
1154
1155 for (; old != head;) {
1156 event_t *event = (event_t *)&data[old & md->mask];
1157
1158 event_t event_copy;
1159 806
1160 size_t size = event->header.size; 807 while ((event = perf_evlist__read_on_cpu(top.evlist, cpu)) != NULL) {
808 perf_session__parse_sample(self, event, &sample);
1161 809
1162 /*
1163 * Event straddles the mmap boundary -- header should always
1164 * be inside due to u64 alignment of output.
1165 */
1166 if ((old & md->mask) + size != ((old + size) & md->mask)) {
1167 unsigned int offset = old;
1168 unsigned int len = min(sizeof(*event), size), cpy;
1169 void *dst = &event_copy;
1170
1171 do {
1172 cpy = min(md->mask + 1 - (offset & md->mask), len);
1173 memcpy(dst, &data[offset & md->mask], cpy);
1174 offset += cpy;
1175 dst += cpy;
1176 len -= cpy;
1177 } while (len);
1178
1179 event = &event_copy;
1180 }
1181
1182 event__parse_sample(event, self, &sample);
1183 if (event->header.type == PERF_RECORD_SAMPLE) 810 if (event->header.type == PERF_RECORD_SAMPLE)
1184 event__process_sample(event, &sample, self, evsel); 811 perf_event__process_sample(event, &sample, self);
1185 else 812 else
1186 event__process(event, &sample, self); 813 perf_event__process(event, &sample, self);
1187 old += size;
1188 } 814 }
1189
1190 md->prev = old;
1191} 815}
1192 816
1193static struct pollfd *event_array;
1194
1195static void perf_session__mmap_read(struct perf_session *self) 817static void perf_session__mmap_read(struct perf_session *self)
1196{ 818{
1197 struct perf_evsel *counter; 819 int i;
1198 int i, thread_index;
1199
1200 for (i = 0; i < cpus->nr; i++) {
1201 list_for_each_entry(counter, &evsel_list, node) {
1202 for (thread_index = 0;
1203 thread_index < threads->nr;
1204 thread_index++) {
1205 perf_session__mmap_read_counter(self,
1206 counter, i, thread_index);
1207 }
1208 }
1209 }
1210}
1211 820
1212int nr_poll; 821 for (i = 0; i < top.evlist->cpus->nr; i++)
1213int group_fd; 822 perf_session__mmap_read_cpu(self, i);
823}
1214 824
1215static void start_counter(int i, struct perf_evsel *evsel) 825static void start_counters(struct perf_evlist *evlist)
1216{ 826{
1217 struct xyarray *mmap_array = evsel->priv; 827 struct perf_evsel *counter;
1218 struct mmap_data *mm;
1219 struct perf_event_attr *attr;
1220 int cpu = -1;
1221 int thread_index;
1222
1223 if (target_tid == -1)
1224 cpu = cpus->map[i];
1225 828
1226 attr = &evsel->attr; 829 list_for_each_entry(counter, &evlist->entries, node) {
830 struct perf_event_attr *attr = &counter->attr;
1227 831
1228 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; 832 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
1229 833
1230 if (freq) { 834 if (top.freq) {
1231 attr->sample_type |= PERF_SAMPLE_PERIOD; 835 attr->sample_type |= PERF_SAMPLE_PERIOD;
1232 attr->freq = 1; 836 attr->freq = 1;
1233 attr->sample_freq = freq; 837 attr->sample_freq = top.freq;
1234 } 838 }
1235 839
1236 attr->inherit = (cpu < 0) && inherit; 840 if (evlist->nr_entries > 1) {
1237 attr->mmap = 1; 841 attr->sample_type |= PERF_SAMPLE_ID;
842 attr->read_format |= PERF_FORMAT_ID;
843 }
1238 844
1239 for (thread_index = 0; thread_index < threads->nr; thread_index++) { 845 attr->mmap = 1;
1240try_again: 846try_again:
1241 FD(evsel, i, thread_index) = sys_perf_event_open(attr, 847 if (perf_evsel__open(counter, top.evlist->cpus,
1242 threads->map[thread_index], cpu, group_fd, 0); 848 top.evlist->threads, group, inherit) < 0) {
1243
1244 if (FD(evsel, i, thread_index) < 0) {
1245 int err = errno; 849 int err = errno;
1246 850
1247 if (err == EPERM || err == EACCES) 851 if (err == EPERM || err == EACCES)
@@ -1253,8 +857,8 @@ try_again:
1253 * based cpu-clock-tick sw counter, which 857 * based cpu-clock-tick sw counter, which
1254 * is always available even if no PMU support: 858 * is always available even if no PMU support:
1255 */ 859 */
1256 if (attr->type == PERF_TYPE_HARDWARE 860 if (attr->type == PERF_TYPE_HARDWARE &&
1257 && attr->config == PERF_COUNT_HW_CPU_CYCLES) { 861 attr->config == PERF_COUNT_HW_CPU_CYCLES) {
1258 862
1259 if (verbose) 863 if (verbose)
1260 warning(" ... trying to fall back to cpu-clock-ticks\n"); 864 warning(" ... trying to fall back to cpu-clock-ticks\n");
@@ -1264,39 +868,22 @@ try_again:
1264 goto try_again; 868 goto try_again;
1265 } 869 }
1266 printf("\n"); 870 printf("\n");
1267 error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n", 871 error("sys_perf_event_open() syscall returned with %d "
1268 FD(evsel, i, thread_index), strerror(err)); 872 "(%s). /bin/dmesg may provide additional information.\n",
873 err, strerror(err));
1269 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 874 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
1270 exit(-1); 875 exit(-1);
1271 } 876 }
1272 assert(FD(evsel, i, thread_index) >= 0);
1273 fcntl(FD(evsel, i, thread_index), F_SETFL, O_NONBLOCK);
1274
1275 /*
1276 * First counter acts as the group leader:
1277 */
1278 if (group && group_fd == -1)
1279 group_fd = FD(evsel, i, thread_index);
1280
1281 event_array[nr_poll].fd = FD(evsel, i, thread_index);
1282 event_array[nr_poll].events = POLLIN;
1283 nr_poll++;
1284
1285 mm = xyarray__entry(mmap_array, i, thread_index);
1286 mm->prev = 0;
1287 mm->mask = mmap_pages*page_size - 1;
1288 mm->base = mmap(NULL, (mmap_pages+1)*page_size,
1289 PROT_READ, MAP_SHARED, FD(evsel, i, thread_index), 0);
1290 if (mm->base == MAP_FAILED)
1291 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
1292 } 877 }
878
879 if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
880 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
1293} 881}
1294 882
1295static int __cmd_top(void) 883static int __cmd_top(void)
1296{ 884{
1297 pthread_t thread; 885 pthread_t thread;
1298 struct perf_evsel *counter; 886 int ret __used;
1299 int i, ret;
1300 /* 887 /*
1301 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this 888 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
1302 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now. 889 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
@@ -1305,23 +892,23 @@ static int __cmd_top(void)
1305 if (session == NULL) 892 if (session == NULL)
1306 return -ENOMEM; 893 return -ENOMEM;
1307 894
1308 if (target_tid != -1) 895 if (top.target_tid != -1)
1309 event__synthesize_thread_map(threads, event__process, session); 896 perf_event__synthesize_thread_map(top.evlist->threads,
897 perf_event__process, session);
1310 else 898 else
1311 event__synthesize_threads(event__process, session); 899 perf_event__synthesize_threads(perf_event__process, session);
1312 900
1313 for (i = 0; i < cpus->nr; i++) { 901 start_counters(top.evlist);
1314 group_fd = -1; 902 session->evlist = top.evlist;
1315 list_for_each_entry(counter, &evsel_list, node) 903 perf_session__update_sample_type(session);
1316 start_counter(i, counter);
1317 }
1318 904
1319 /* Wait for a minimal set of events before starting the snapshot */ 905 /* Wait for a minimal set of events before starting the snapshot */
1320 poll(&event_array[0], nr_poll, 100); 906 poll(top.evlist->pollfd, top.evlist->nr_fds, 100);
1321 907
1322 perf_session__mmap_read(session); 908 perf_session__mmap_read(session);
1323 909
1324 if (pthread_create(&thread, NULL, display_thread, session)) { 910 if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
911 display_thread), session)) {
1325 printf("Could not create display thread.\n"); 912 printf("Could not create display thread.\n");
1326 exit(-1); 913 exit(-1);
1327 } 914 }
@@ -1337,12 +924,12 @@ static int __cmd_top(void)
1337 } 924 }
1338 925
1339 while (1) { 926 while (1) {
1340 int hits = samples; 927 u64 hits = top.samples;
1341 928
1342 perf_session__mmap_read(session); 929 perf_session__mmap_read(session);
1343 930
1344 if (hits == samples) 931 if (hits == top.samples)
1345 ret = poll(event_array, nr_poll, 100); 932 ret = poll(top.evlist->pollfd, top.evlist->nr_fds, 100);
1346 } 933 }
1347 934
1348 return 0; 935 return 0;
@@ -1354,31 +941,31 @@ static const char * const top_usage[] = {
1354}; 941};
1355 942
1356static const struct option options[] = { 943static const struct option options[] = {
1357 OPT_CALLBACK('e', "event", NULL, "event", 944 OPT_CALLBACK('e', "event", &top.evlist, "event",
1358 "event selector. use 'perf list' to list available events", 945 "event selector. use 'perf list' to list available events",
1359 parse_events), 946 parse_events),
1360 OPT_INTEGER('c', "count", &default_interval, 947 OPT_INTEGER('c', "count", &default_interval,
1361 "event period to sample"), 948 "event period to sample"),
1362 OPT_INTEGER('p', "pid", &target_pid, 949 OPT_INTEGER('p', "pid", &top.target_pid,
1363 "profile events on existing process id"), 950 "profile events on existing process id"),
1364 OPT_INTEGER('t', "tid", &target_tid, 951 OPT_INTEGER('t', "tid", &top.target_tid,
1365 "profile events on existing thread id"), 952 "profile events on existing thread id"),
1366 OPT_BOOLEAN('a', "all-cpus", &system_wide, 953 OPT_BOOLEAN('a', "all-cpus", &system_wide,
1367 "system-wide collection from all CPUs"), 954 "system-wide collection from all CPUs"),
1368 OPT_STRING('C', "cpu", &cpu_list, "cpu", 955 OPT_STRING('C', "cpu", &top.cpu_list, "cpu",
1369 "list of cpus to monitor"), 956 "list of cpus to monitor"),
1370 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 957 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
1371 "file", "vmlinux pathname"), 958 "file", "vmlinux pathname"),
1372 OPT_BOOLEAN('K', "hide_kernel_symbols", &hide_kernel_symbols, 959 OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
1373 "hide kernel symbols"), 960 "hide kernel symbols"),
1374 OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), 961 OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
1375 OPT_INTEGER('r', "realtime", &realtime_prio, 962 OPT_INTEGER('r', "realtime", &realtime_prio,
1376 "collect data with this RT SCHED_FIFO priority"), 963 "collect data with this RT SCHED_FIFO priority"),
1377 OPT_INTEGER('d', "delay", &delay_secs, 964 OPT_INTEGER('d', "delay", &top.delay_secs,
1378 "number of seconds to delay between refreshes"), 965 "number of seconds to delay between refreshes"),
1379 OPT_BOOLEAN('D', "dump-symtab", &dump_symtab, 966 OPT_BOOLEAN('D', "dump-symtab", &dump_symtab,
1380 "dump the symbol table used for profiling"), 967 "dump the symbol table used for profiling"),
1381 OPT_INTEGER('f', "count-filter", &count_filter, 968 OPT_INTEGER('f', "count-filter", &top.count_filter,
1382 "only display functions with more events than this"), 969 "only display functions with more events than this"),
1383 OPT_BOOLEAN('g', "group", &group, 970 OPT_BOOLEAN('g', "group", &group,
1384 "put the counters into a counter group"), 971 "put the counters into a counter group"),
@@ -1386,14 +973,16 @@ static const struct option options[] = {
1386 "child tasks inherit counters"), 973 "child tasks inherit counters"),
1387 OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name", 974 OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name",
1388 "symbol to annotate"), 975 "symbol to annotate"),
1389 OPT_BOOLEAN('z', "zero", &zero, 976 OPT_BOOLEAN('z', "zero", &top.zero,
1390 "zero history across updates"), 977 "zero history across updates"),
1391 OPT_INTEGER('F', "freq", &freq, 978 OPT_INTEGER('F', "freq", &top.freq,
1392 "profile at this frequency"), 979 "profile at this frequency"),
1393 OPT_INTEGER('E', "entries", &print_entries, 980 OPT_INTEGER('E', "entries", &top.print_entries,
1394 "display this many functions"), 981 "display this many functions"),
1395 OPT_BOOLEAN('U', "hide_user_symbols", &hide_user_symbols, 982 OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
1396 "hide user symbols"), 983 "hide user symbols"),
984 OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"),
985 OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"),
1397 OPT_INCR('v', "verbose", &verbose, 986 OPT_INCR('v', "verbose", &verbose,
1398 "be more verbose (show counter open errors, etc)"), 987 "be more verbose (show counter open errors, etc)"),
1399 OPT_END() 988 OPT_END()
@@ -1404,64 +993,68 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1404 struct perf_evsel *pos; 993 struct perf_evsel *pos;
1405 int status = -ENOMEM; 994 int status = -ENOMEM;
1406 995
996 top.evlist = perf_evlist__new(NULL, NULL);
997 if (top.evlist == NULL)
998 return -ENOMEM;
999
1407 page_size = sysconf(_SC_PAGE_SIZE); 1000 page_size = sysconf(_SC_PAGE_SIZE);
1408 1001
1409 argc = parse_options(argc, argv, options, top_usage, 0); 1002 argc = parse_options(argc, argv, options, top_usage, 0);
1410 if (argc) 1003 if (argc)
1411 usage_with_options(top_usage, options); 1004 usage_with_options(top_usage, options);
1412 1005
1413 if (target_pid != -1) 1006 /*
1414 target_tid = target_pid; 1007 * XXX For now start disabled, only using TUI if explicitely asked for.
1008 * Change that when handle_keys equivalent gets written, live annotation
1009 * done, etc.
1010 */
1011 use_browser = 0;
1415 1012
1416 threads = thread_map__new(target_pid, target_tid); 1013 if (use_stdio)
1417 if (threads == NULL) { 1014 use_browser = 0;
1418 pr_err("Problems finding threads of monitor\n"); 1015 else if (use_tui)
1419 usage_with_options(top_usage, options); 1016 use_browser = 1;
1420 }
1421 1017
1422 event_array = malloc((sizeof(struct pollfd) * 1018 setup_browser(false);
1423 MAX_NR_CPUS * MAX_COUNTERS * threads->nr));
1424 if (!event_array)
1425 return -ENOMEM;
1426 1019
1427 /* CPU and PID are mutually exclusive */ 1020 /* CPU and PID are mutually exclusive */
1428 if (target_tid > 0 && cpu_list) { 1021 if (top.target_tid > 0 && top.cpu_list) {
1429 printf("WARNING: PID switch overriding CPU\n"); 1022 printf("WARNING: PID switch overriding CPU\n");
1430 sleep(1); 1023 sleep(1);
1431 cpu_list = NULL; 1024 top.cpu_list = NULL;
1432 } 1025 }
1433 1026
1434 if (!nr_counters && perf_evsel_list__create_default() < 0) { 1027 if (top.target_pid != -1)
1028 top.target_tid = top.target_pid;
1029
1030 if (perf_evlist__create_maps(top.evlist, top.target_pid,
1031 top.target_tid, top.cpu_list) < 0)
1032 usage_with_options(top_usage, options);
1033
1034 if (!top.evlist->nr_entries &&
1035 perf_evlist__add_default(top.evlist) < 0) {
1435 pr_err("Not enough memory for event selector list\n"); 1036 pr_err("Not enough memory for event selector list\n");
1436 return -ENOMEM; 1037 return -ENOMEM;
1437 } 1038 }
1438 1039
1439 if (delay_secs < 1) 1040 if (top.delay_secs < 1)
1440 delay_secs = 1; 1041 top.delay_secs = 1;
1441 1042
1442 /* 1043 /*
1443 * User specified count overrides default frequency. 1044 * User specified count overrides default frequency.
1444 */ 1045 */
1445 if (default_interval) 1046 if (default_interval)
1446 freq = 0; 1047 top.freq = 0;
1447 else if (freq) { 1048 else if (top.freq) {
1448 default_interval = freq; 1049 default_interval = top.freq;
1449 } else { 1050 } else {
1450 fprintf(stderr, "frequency and count are zero, aborting\n"); 1051 fprintf(stderr, "frequency and count are zero, aborting\n");
1451 exit(EXIT_FAILURE); 1052 exit(EXIT_FAILURE);
1452 } 1053 }
1453 1054
1454 if (target_tid != -1) 1055 list_for_each_entry(pos, &top.evlist->entries, node) {
1455 cpus = cpu_map__dummy_new(); 1056 if (perf_evsel__alloc_fd(pos, top.evlist->cpus->nr,
1456 else 1057 top.evlist->threads->nr) < 0)
1457 cpus = cpu_map__new(cpu_list);
1458
1459 if (cpus == NULL)
1460 usage_with_options(top_usage, options);
1461
1462 list_for_each_entry(pos, &evsel_list, node) {
1463 if (perf_evsel__alloc_mmap_per_thread(pos, cpus->nr, threads->nr) < 0 ||
1464 perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
1465 goto out_free_fd; 1058 goto out_free_fd;
1466 /* 1059 /*
1467 * Fill in the ones not specifically initialized via -c: 1060 * Fill in the ones not specifically initialized via -c:
@@ -1472,26 +1065,28 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1472 pos->attr.sample_period = default_interval; 1065 pos->attr.sample_period = default_interval;
1473 } 1066 }
1474 1067
1475 sym_evsel = list_entry(evsel_list.next, struct perf_evsel, node); 1068 if (perf_evlist__alloc_pollfd(top.evlist) < 0 ||
1069 perf_evlist__alloc_mmap(top.evlist) < 0)
1070 goto out_free_fd;
1071
1072 top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
1476 1073
1477 symbol_conf.priv_size = (sizeof(struct sym_entry) + 1074 symbol_conf.priv_size = (sizeof(struct sym_entry) + sizeof(struct annotation) +
1478 (nr_counters + 1) * sizeof(unsigned long)); 1075 (top.evlist->nr_entries + 1) * sizeof(unsigned long));
1479 1076
1480 symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); 1077 symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
1481 if (symbol__init() < 0) 1078 if (symbol__init() < 0)
1482 return -1; 1079 return -1;
1483 1080
1484 get_term_dimensions(&winsize); 1081 get_term_dimensions(&winsize);
1485 if (print_entries == 0) { 1082 if (top.print_entries == 0) {
1486 update_print_entries(&winsize); 1083 update_print_entries(&winsize);
1487 signal(SIGWINCH, sig_winch_handler); 1084 signal(SIGWINCH, sig_winch_handler);
1488 } 1085 }
1489 1086
1490 status = __cmd_top(); 1087 status = __cmd_top();
1491out_free_fd: 1088out_free_fd:
1492 list_for_each_entry(pos, &evsel_list, node) 1089 perf_evlist__delete(top.evlist);
1493 perf_evsel__free_mmap(pos);
1494 perf_evsel_list__delete();
1495 1090
1496 return status; 1091 return status;
1497} 1092}
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 95aaf565c704..a5fc660c1f12 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -94,6 +94,32 @@ void get_term_dimensions(struct winsize *ws);
94#include "util/types.h" 94#include "util/types.h"
95#include <stdbool.h> 95#include <stdbool.h>
96 96
97struct perf_mmap {
98 void *base;
99 int mask;
100 unsigned int prev;
101};
102
103static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm)
104{
105 struct perf_event_mmap_page *pc = mm->base;
106 int head = pc->data_head;
107 rmb();
108 return head;
109}
110
111static inline void perf_mmap__write_tail(struct perf_mmap *md,
112 unsigned long tail)
113{
114 struct perf_event_mmap_page *pc = md->base;
115
116 /*
117 * ensure all reads are done before we write the tail out.
118 */
119 /* mb(); */
120 pc->data_tail = tail;
121}
122
97/* 123/*
98 * prctl(PR_TASK_PERF_EVENTS_DISABLE) will (cheaply) disable all 124 * prctl(PR_TASK_PERF_EVENTS_DISABLE) will (cheaply) disable all
99 * counters in the current task. 125 * counters in the current task.
diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py
new file mode 100755
index 000000000000..df638c438a9f
--- /dev/null
+++ b/tools/perf/python/twatch.py
@@ -0,0 +1,41 @@
1#! /usr/bin/python
2# -*- python -*-
3# -*- coding: utf-8 -*-
4# twatch - Experimental use of the perf python interface
5# Copyright (C) 2011 Arnaldo Carvalho de Melo <acme@redhat.com>
6#
7# This application is free software; you can redistribute it and/or
8# modify it under the terms of the GNU General Public License
9# as published by the Free Software Foundation; version 2.
10#
11# This application is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14# General Public License for more details.
15
16import perf
17
18def main():
19 cpus = perf.cpu_map()
20 threads = perf.thread_map()
21 evsel = perf.evsel(task = 1, comm = 1, mmap = 0,
22 wakeup_events = 1, sample_period = 1,
23 sample_id_all = 1,
24 sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU | perf.SAMPLE_TID)
25 evsel.open(cpus = cpus, threads = threads);
26 evlist = perf.evlist(cpus, threads)
27 evlist.add(evsel)
28 evlist.mmap()
29 while True:
30 evlist.poll(timeout = -1)
31 for cpu in cpus:
32 event = evlist.read_on_cpu(cpu)
33 if not event:
34 continue
35 print "cpu: %2d, pid: %4d, tid: %4d" % (event.sample_cpu,
36 event.sample_pid,
37 event.sample_tid),
38 print event
39
40if __name__ == '__main__':
41 main()
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
new file mode 100644
index 000000000000..0d0830c98cd7
--- /dev/null
+++ b/tools/perf/util/annotate.c
@@ -0,0 +1,605 @@
1/*
2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
3 *
4 * Parts came from builtin-annotate.c, see those files for further
5 * copyright notes.
6 *
7 * Released under the GPL v2. (and only v2, not any later version)
8 */
9
10#include "util.h"
11#include "build-id.h"
12#include "color.h"
13#include "cache.h"
14#include "symbol.h"
15#include "debug.h"
16#include "annotate.h"
17#include <pthread.h>
18
19int symbol__annotate_init(struct map *map __used, struct symbol *sym)
20{
21 struct annotation *notes = symbol__annotation(sym);
22 pthread_mutex_init(&notes->lock, NULL);
23 return 0;
24}
25
26int symbol__alloc_hist(struct symbol *sym, int nevents)
27{
28 struct annotation *notes = symbol__annotation(sym);
29 size_t sizeof_sym_hist = (sizeof(struct sym_hist) +
30 (sym->end - sym->start) * sizeof(u64));
31
32 notes->src = zalloc(sizeof(*notes->src) + nevents * sizeof_sym_hist);
33 if (notes->src == NULL)
34 return -1;
35 notes->src->sizeof_sym_hist = sizeof_sym_hist;
36 notes->src->nr_histograms = nevents;
37 INIT_LIST_HEAD(&notes->src->source);
38 return 0;
39}
40
41void symbol__annotate_zero_histograms(struct symbol *sym)
42{
43 struct annotation *notes = symbol__annotation(sym);
44
45 pthread_mutex_lock(&notes->lock);
46 if (notes->src != NULL)
47 memset(notes->src->histograms, 0,
48 notes->src->nr_histograms * notes->src->sizeof_sym_hist);
49 pthread_mutex_unlock(&notes->lock);
50}
51
52int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
53 int evidx, u64 addr)
54{
55 unsigned offset;
56 struct annotation *notes;
57 struct sym_hist *h;
58
59 notes = symbol__annotation(sym);
60 if (notes->src == NULL)
61 return -ENOMEM;
62
63 pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map->unmap_ip(map, addr));
64
65 if (addr >= sym->end)
66 return 0;
67
68 offset = addr - sym->start;
69 h = annotation__histogram(notes, evidx);
70 h->sum++;
71 h->addr[offset]++;
72
73 pr_debug3("%#" PRIx64 " %s: period++ [addr: %#" PRIx64 ", %#" PRIx64
74 ", evidx=%d] => %" PRIu64 "\n", sym->start, sym->name,
75 addr, addr - sym->start, evidx, h->addr[offset]);
76 return 0;
77}
78
79static struct objdump_line *objdump_line__new(s64 offset, char *line, size_t privsize)
80{
81 struct objdump_line *self = malloc(sizeof(*self) + privsize);
82
83 if (self != NULL) {
84 self->offset = offset;
85 self->line = line;
86 }
87
88 return self;
89}
90
91void objdump_line__free(struct objdump_line *self)
92{
93 free(self->line);
94 free(self);
95}
96
97static void objdump__add_line(struct list_head *head, struct objdump_line *line)
98{
99 list_add_tail(&line->node, head);
100}
101
102struct objdump_line *objdump__get_next_ip_line(struct list_head *head,
103 struct objdump_line *pos)
104{
105 list_for_each_entry_continue(pos, head, node)
106 if (pos->offset >= 0)
107 return pos;
108
109 return NULL;
110}
111
112static int objdump_line__print(struct objdump_line *oline, struct symbol *sym,
113 int evidx, u64 len, int min_pcnt,
114 int printed, int max_lines,
115 struct objdump_line *queue)
116{
117 static const char *prev_line;
118 static const char *prev_color;
119
120 if (oline->offset != -1) {
121 const char *path = NULL;
122 unsigned int hits = 0;
123 double percent = 0.0;
124 const char *color;
125 struct annotation *notes = symbol__annotation(sym);
126 struct source_line *src_line = notes->src->lines;
127 struct sym_hist *h = annotation__histogram(notes, evidx);
128 s64 offset = oline->offset;
129 struct objdump_line *next;
130
131 next = objdump__get_next_ip_line(&notes->src->source, oline);
132
133 while (offset < (s64)len &&
134 (next == NULL || offset < next->offset)) {
135 if (src_line) {
136 if (path == NULL)
137 path = src_line[offset].path;
138 percent += src_line[offset].percent;
139 } else
140 hits += h->addr[offset];
141
142 ++offset;
143 }
144
145 if (src_line == NULL && h->sum)
146 percent = 100.0 * hits / h->sum;
147
148 if (percent < min_pcnt)
149 return -1;
150
151 if (max_lines && printed >= max_lines)
152 return 1;
153
154 if (queue != NULL) {
155 list_for_each_entry_from(queue, &notes->src->source, node) {
156 if (queue == oline)
157 break;
158 objdump_line__print(queue, sym, evidx, len,
159 0, 0, 1, NULL);
160 }
161 }
162
163 color = get_percent_color(percent);
164
165 /*
166 * Also color the filename and line if needed, with
167 * the same color than the percentage. Don't print it
168 * twice for close colored addr with the same filename:line
169 */
170 if (path) {
171 if (!prev_line || strcmp(prev_line, path)
172 || color != prev_color) {
173 color_fprintf(stdout, color, " %s", path);
174 prev_line = path;
175 prev_color = color;
176 }
177 }
178
179 color_fprintf(stdout, color, " %7.2f", percent);
180 printf(" : ");
181 color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", oline->line);
182 } else if (max_lines && printed >= max_lines)
183 return 1;
184 else {
185 if (queue)
186 return -1;
187
188 if (!*oline->line)
189 printf(" :\n");
190 else
191 printf(" : %s\n", oline->line);
192 }
193
194 return 0;
195}
196
197static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
198 FILE *file, size_t privsize)
199{
200 struct annotation *notes = symbol__annotation(sym);
201 struct objdump_line *objdump_line;
202 char *line = NULL, *tmp, *tmp2, *c;
203 size_t line_len;
204 s64 line_ip, offset = -1;
205
206 if (getline(&line, &line_len, file) < 0)
207 return -1;
208
209 if (!line)
210 return -1;
211
212 while (line_len != 0 && isspace(line[line_len - 1]))
213 line[--line_len] = '\0';
214
215 c = strchr(line, '\n');
216 if (c)
217 *c = 0;
218
219 line_ip = -1;
220
221 /*
222 * Strip leading spaces:
223 */
224 tmp = line;
225 while (*tmp) {
226 if (*tmp != ' ')
227 break;
228 tmp++;
229 }
230
231 if (*tmp) {
232 /*
233 * Parse hexa addresses followed by ':'
234 */
235 line_ip = strtoull(tmp, &tmp2, 16);
236 if (*tmp2 != ':' || tmp == tmp2 || tmp2[1] == '\0')
237 line_ip = -1;
238 }
239
240 if (line_ip != -1) {
241 u64 start = map__rip_2objdump(map, sym->start),
242 end = map__rip_2objdump(map, sym->end);
243
244 offset = line_ip - start;
245 if (offset < 0 || (u64)line_ip > end)
246 offset = -1;
247 }
248
249 objdump_line = objdump_line__new(offset, line, privsize);
250 if (objdump_line == NULL) {
251 free(line);
252 return -1;
253 }
254 objdump__add_line(&notes->src->source, objdump_line);
255
256 return 0;
257}
258
259int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize)
260{
261 struct dso *dso = map->dso;
262 char *filename = dso__build_id_filename(dso, NULL, 0);
263 bool free_filename = true;
264 char command[PATH_MAX * 2];
265 FILE *file;
266 int err = 0;
267 char symfs_filename[PATH_MAX];
268
269 if (filename) {
270 snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
271 symbol_conf.symfs, filename);
272 }
273
274 if (filename == NULL) {
275 if (dso->has_build_id) {
276 pr_err("Can't annotate %s: not enough memory\n",
277 sym->name);
278 return -ENOMEM;
279 }
280 goto fallback;
281 } else if (readlink(symfs_filename, command, sizeof(command)) < 0 ||
282 strstr(command, "[kernel.kallsyms]") ||
283 access(symfs_filename, R_OK)) {
284 free(filename);
285fallback:
286 /*
287 * If we don't have build-ids or the build-id file isn't in the
288 * cache, or is just a kallsyms file, well, lets hope that this
289 * DSO is the same as when 'perf record' ran.
290 */
291 filename = dso->long_name;
292 snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
293 symbol_conf.symfs, filename);
294 free_filename = false;
295 }
296
297 if (dso->origin == DSO__ORIG_KERNEL) {
298 char bf[BUILD_ID_SIZE * 2 + 16] = " with build id ";
299 char *build_id_msg = NULL;
300
301 if (dso->annotate_warned)
302 goto out_free_filename;
303
304 if (dso->has_build_id) {
305 build_id__sprintf(dso->build_id,
306 sizeof(dso->build_id), bf + 15);
307 build_id_msg = bf;
308 }
309 err = -ENOENT;
310 dso->annotate_warned = 1;
311 pr_err("Can't annotate %s: No vmlinux file%s was found in the "
312 "path.\nPlease use 'perf buildid-cache -av vmlinux' or "
313 "--vmlinux vmlinux.\n",
314 sym->name, build_id_msg ?: "");
315 goto out_free_filename;
316 }
317
318 pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
319 filename, sym->name, map->unmap_ip(map, sym->start),
320 map->unmap_ip(map, sym->end));
321
322 pr_debug("annotating [%p] %30s : [%p] %30s\n",
323 dso, dso->long_name, sym, sym->name);
324
325 snprintf(command, sizeof(command),
326 "objdump --start-address=0x%016" PRIx64
327 " --stop-address=0x%016" PRIx64 " -dS -C %s|grep -v %s|expand",
328 map__rip_2objdump(map, sym->start),
329 map__rip_2objdump(map, sym->end),
330 symfs_filename, filename);
331
332 pr_debug("Executing: %s\n", command);
333
334 file = popen(command, "r");
335 if (!file)
336 goto out_free_filename;
337
338 while (!feof(file))
339 if (symbol__parse_objdump_line(sym, map, file, privsize) < 0)
340 break;
341
342 pclose(file);
343out_free_filename:
344 if (free_filename)
345 free(filename);
346 return err;
347}
348
349static void insert_source_line(struct rb_root *root, struct source_line *src_line)
350{
351 struct source_line *iter;
352 struct rb_node **p = &root->rb_node;
353 struct rb_node *parent = NULL;
354
355 while (*p != NULL) {
356 parent = *p;
357 iter = rb_entry(parent, struct source_line, node);
358
359 if (src_line->percent > iter->percent)
360 p = &(*p)->rb_left;
361 else
362 p = &(*p)->rb_right;
363 }
364
365 rb_link_node(&src_line->node, parent, p);
366 rb_insert_color(&src_line->node, root);
367}
368
369static void symbol__free_source_line(struct symbol *sym, int len)
370{
371 struct annotation *notes = symbol__annotation(sym);
372 struct source_line *src_line = notes->src->lines;
373 int i;
374
375 for (i = 0; i < len; i++)
376 free(src_line[i].path);
377
378 free(src_line);
379 notes->src->lines = NULL;
380}
381
382/* Get the filename:line for the colored entries */
383static int symbol__get_source_line(struct symbol *sym, struct map *map,
384 int evidx, struct rb_root *root, int len,
385 const char *filename)
386{
387 u64 start;
388 int i;
389 char cmd[PATH_MAX * 2];
390 struct source_line *src_line;
391 struct annotation *notes = symbol__annotation(sym);
392 struct sym_hist *h = annotation__histogram(notes, evidx);
393
394 if (!h->sum)
395 return 0;
396
397 src_line = notes->src->lines = calloc(len, sizeof(struct source_line));
398 if (!notes->src->lines)
399 return -1;
400
401 start = map->unmap_ip(map, sym->start);
402
403 for (i = 0; i < len; i++) {
404 char *path = NULL;
405 size_t line_len;
406 u64 offset;
407 FILE *fp;
408
409 src_line[i].percent = 100.0 * h->addr[i] / h->sum;
410 if (src_line[i].percent <= 0.5)
411 continue;
412
413 offset = start + i;
414 sprintf(cmd, "addr2line -e %s %016" PRIx64, filename, offset);
415 fp = popen(cmd, "r");
416 if (!fp)
417 continue;
418
419 if (getline(&path, &line_len, fp) < 0 || !line_len)
420 goto next;
421
422 src_line[i].path = malloc(sizeof(char) * line_len + 1);
423 if (!src_line[i].path)
424 goto next;
425
426 strcpy(src_line[i].path, path);
427 insert_source_line(root, &src_line[i]);
428
429 next:
430 pclose(fp);
431 }
432
433 return 0;
434}
435
436static void print_summary(struct rb_root *root, const char *filename)
437{
438 struct source_line *src_line;
439 struct rb_node *node;
440
441 printf("\nSorted summary for file %s\n", filename);
442 printf("----------------------------------------------\n\n");
443
444 if (RB_EMPTY_ROOT(root)) {
445 printf(" Nothing higher than %1.1f%%\n", MIN_GREEN);
446 return;
447 }
448
449 node = rb_first(root);
450 while (node) {
451 double percent;
452 const char *color;
453 char *path;
454
455 src_line = rb_entry(node, struct source_line, node);
456 percent = src_line->percent;
457 color = get_percent_color(percent);
458 path = src_line->path;
459
460 color_fprintf(stdout, color, " %7.2f %s", percent, path);
461 node = rb_next(node);
462 }
463}
464
465static void symbol__annotate_hits(struct symbol *sym, int evidx)
466{
467 struct annotation *notes = symbol__annotation(sym);
468 struct sym_hist *h = annotation__histogram(notes, evidx);
469 u64 len = sym->end - sym->start, offset;
470
471 for (offset = 0; offset < len; ++offset)
472 if (h->addr[offset] != 0)
473 printf("%*" PRIx64 ": %" PRIu64 "\n", BITS_PER_LONG / 2,
474 sym->start + offset, h->addr[offset]);
475 printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->sum", h->sum);
476}
477
478int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx,
479 bool full_paths, int min_pcnt, int max_lines,
480 int context)
481{
482 struct dso *dso = map->dso;
483 const char *filename = dso->long_name, *d_filename;
484 struct annotation *notes = symbol__annotation(sym);
485 struct objdump_line *pos, *queue = NULL;
486 int printed = 2, queue_len = 0;
487 int more = 0;
488 u64 len;
489
490 if (full_paths)
491 d_filename = filename;
492 else
493 d_filename = basename(filename);
494
495 len = sym->end - sym->start;
496
497 printf(" Percent | Source code & Disassembly of %s\n", d_filename);
498 printf("------------------------------------------------\n");
499
500 if (verbose)
501 symbol__annotate_hits(sym, evidx);
502
503 list_for_each_entry(pos, &notes->src->source, node) {
504 if (context && queue == NULL) {
505 queue = pos;
506 queue_len = 0;
507 }
508
509 switch (objdump_line__print(pos, sym, evidx, len, min_pcnt,
510 printed, max_lines, queue)) {
511 case 0:
512 ++printed;
513 if (context) {
514 printed += queue_len;
515 queue = NULL;
516 queue_len = 0;
517 }
518 break;
519 case 1:
520 /* filtered by max_lines */
521 ++more;
522 break;
523 case -1:
524 default:
525 /*
526 * Filtered by min_pcnt or non IP lines when
527 * context != 0
528 */
529 if (!context)
530 break;
531 if (queue_len == context)
532 queue = list_entry(queue->node.next, typeof(*queue), node);
533 else
534 ++queue_len;
535 break;
536 }
537 }
538
539 return more;
540}
541
542void symbol__annotate_zero_histogram(struct symbol *sym, int evidx)
543{
544 struct annotation *notes = symbol__annotation(sym);
545 struct sym_hist *h = annotation__histogram(notes, evidx);
546
547 memset(h, 0, notes->src->sizeof_sym_hist);
548}
549
550void symbol__annotate_decay_histogram(struct symbol *sym, int evidx)
551{
552 struct annotation *notes = symbol__annotation(sym);
553 struct sym_hist *h = annotation__histogram(notes, evidx);
554 struct objdump_line *pos;
555 int len = sym->end - sym->start;
556
557 h->sum = 0;
558
559 list_for_each_entry(pos, &notes->src->source, node) {
560 if (pos->offset != -1 && pos->offset < len) {
561 h->addr[pos->offset] = h->addr[pos->offset] * 7 / 8;
562 h->sum += h->addr[pos->offset];
563 }
564 }
565}
566
567void objdump_line_list__purge(struct list_head *head)
568{
569 struct objdump_line *pos, *n;
570
571 list_for_each_entry_safe(pos, n, head, node) {
572 list_del(&pos->node);
573 objdump_line__free(pos);
574 }
575}
576
577int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx,
578 bool print_lines, bool full_paths, int min_pcnt,
579 int max_lines)
580{
581 struct dso *dso = map->dso;
582 const char *filename = dso->long_name;
583 struct rb_root source_line = RB_ROOT;
584 u64 len;
585
586 if (symbol__annotate(sym, map, 0) < 0)
587 return -1;
588
589 len = sym->end - sym->start;
590
591 if (print_lines) {
592 symbol__get_source_line(sym, map, evidx, &source_line,
593 len, filename);
594 print_summary(&source_line, filename);
595 }
596
597 symbol__annotate_printf(sym, map, evidx, full_paths,
598 min_pcnt, max_lines, 0);
599 if (print_lines)
600 symbol__free_source_line(sym, len);
601
602 objdump_line_list__purge(&symbol__annotation(sym)->src->source);
603
604 return 0;
605}
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
new file mode 100644
index 000000000000..c2c286896801
--- /dev/null
+++ b/tools/perf/util/annotate.h
@@ -0,0 +1,103 @@
1#ifndef __PERF_ANNOTATE_H
2#define __PERF_ANNOTATE_H
3
4#include <stdbool.h>
5#include "types.h"
6#include "symbol.h"
7#include <linux/list.h>
8#include <linux/rbtree.h>
9
10struct objdump_line {
11 struct list_head node;
12 s64 offset;
13 char *line;
14};
15
16void objdump_line__free(struct objdump_line *self);
17struct objdump_line *objdump__get_next_ip_line(struct list_head *head,
18 struct objdump_line *pos);
19
20struct sym_hist {
21 u64 sum;
22 u64 addr[0];
23};
24
25struct source_line {
26 struct rb_node node;
27 double percent;
28 char *path;
29};
30
31/** struct annotated_source - symbols with hits have this attached as in sannotation
32 *
33 * @histogram: Array of addr hit histograms per event being monitored
34 * @lines: If 'print_lines' is specified, per source code line percentages
35 * @source: source parsed from objdump -dS
36 *
37 * lines is allocated, percentages calculated and all sorted by percentage
38 * when the annotation is about to be presented, so the percentages are for
39 * one of the entries in the histogram array, i.e. for the event/counter being
40 * presented. It is deallocated right after symbol__{tui,tty,etc}_annotate
41 * returns.
42 */
43struct annotated_source {
44 struct list_head source;
45 struct source_line *lines;
46 int nr_histograms;
47 int sizeof_sym_hist;
48 struct sym_hist histograms[0];
49};
50
51struct annotation {
52 pthread_mutex_t lock;
53 struct annotated_source *src;
54};
55
56struct sannotation {
57 struct annotation annotation;
58 struct symbol symbol;
59};
60
61static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx)
62{
63 return (((void *)&notes->src->histograms) +
64 (notes->src->sizeof_sym_hist * idx));
65}
66
67static inline struct annotation *symbol__annotation(struct symbol *sym)
68{
69 struct sannotation *a = container_of(sym, struct sannotation, symbol);
70 return &a->annotation;
71}
72
73int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
74 int evidx, u64 addr);
75int symbol__alloc_hist(struct symbol *sym, int nevents);
76void symbol__annotate_zero_histograms(struct symbol *sym);
77
78int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize);
79int symbol__annotate_init(struct map *map __used, struct symbol *sym);
80int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx,
81 bool full_paths, int min_pcnt, int max_lines,
82 int context);
83void symbol__annotate_zero_histogram(struct symbol *sym, int evidx);
84void symbol__annotate_decay_histogram(struct symbol *sym, int evidx);
85void objdump_line_list__purge(struct list_head *head);
86
87int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx,
88 bool print_lines, bool full_paths, int min_pcnt,
89 int max_lines);
90
91#ifdef NO_NEWT_SUPPORT
92static inline int symbol__tui_annotate(struct symbol *sym __used,
93 struct map *map __used,
94 int evidx __used, int refresh __used)
95{
96 return 0;
97}
98#else
99int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
100 int refresh);
101#endif
102
103#endif /* __PERF_ANNOTATE_H */
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index deffb8c96071..31f934af9861 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -14,8 +14,8 @@
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include "debug.h" 15#include "debug.h"
16 16
17static int build_id__mark_dso_hit(event_t *event, 17static int build_id__mark_dso_hit(union perf_event *event,
18 struct sample_data *sample __used, 18 struct perf_sample *sample __used,
19 struct perf_session *session) 19 struct perf_session *session)
20{ 20{
21 struct addr_location al; 21 struct addr_location al;
@@ -37,13 +37,14 @@ static int build_id__mark_dso_hit(event_t *event,
37 return 0; 37 return 0;
38} 38}
39 39
40static int event__exit_del_thread(event_t *self, struct sample_data *sample __used, 40static int perf_event__exit_del_thread(union perf_event *event,
41 struct perf_session *session) 41 struct perf_sample *sample __used,
42 struct perf_session *session)
42{ 43{
43 struct thread *thread = perf_session__findnew(session, self->fork.tid); 44 struct thread *thread = perf_session__findnew(session, event->fork.tid);
44 45
45 dump_printf("(%d:%d):(%d:%d)\n", self->fork.pid, self->fork.tid, 46 dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid,
46 self->fork.ppid, self->fork.ptid); 47 event->fork.ppid, event->fork.ptid);
47 48
48 if (thread) { 49 if (thread) {
49 rb_erase(&thread->rb_node, &session->threads); 50 rb_erase(&thread->rb_node, &session->threads);
@@ -56,9 +57,9 @@ static int event__exit_del_thread(event_t *self, struct sample_data *sample __us
56 57
57struct perf_event_ops build_id__mark_dso_hit_ops = { 58struct perf_event_ops build_id__mark_dso_hit_ops = {
58 .sample = build_id__mark_dso_hit, 59 .sample = build_id__mark_dso_hit,
59 .mmap = event__process_mmap, 60 .mmap = perf_event__process_mmap,
60 .fork = event__process_task, 61 .fork = perf_event__process_task,
61 .exit = event__exit_del_thread, 62 .exit = perf_event__exit_del_thread,
62}; 63};
63 64
64char *dso__build_id_filename(struct dso *self, char *bf, size_t size) 65char *dso__build_id_filename(struct dso *self, char *bf, size_t size)
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index a7729797fd96..fc5e5a09d5b9 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -34,13 +34,14 @@ extern int pager_use_color;
34extern int use_browser; 34extern int use_browser;
35 35
36#ifdef NO_NEWT_SUPPORT 36#ifdef NO_NEWT_SUPPORT
37static inline void setup_browser(void) 37static inline void setup_browser(bool fallback_to_pager)
38{ 38{
39 setup_pager(); 39 if (fallback_to_pager)
40 setup_pager();
40} 41}
41static inline void exit_browser(bool wait_for_ok __used) {} 42static inline void exit_browser(bool wait_for_ok __used) {}
42#else 43#else
43void setup_browser(void); 44void setup_browser(bool fallback_to_pager);
44void exit_browser(bool wait_for_ok); 45void exit_browser(bool wait_for_ok);
45#endif 46#endif
46 47
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index e12d539417b2..9f7106a8d9a4 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2009-2010, Frederic Weisbecker <fweisbec@gmail.com> 2 * Copyright (C) 2009-2011, Frederic Weisbecker <fweisbec@gmail.com>
3 * 3 *
4 * Handle the callchains from the stream in an ad-hoc radix tree and then 4 * Handle the callchains from the stream in an ad-hoc radix tree and then
5 * sort them in an rbtree. 5 * sort them in an rbtree.
@@ -18,7 +18,8 @@
18#include "util.h" 18#include "util.h"
19#include "callchain.h" 19#include "callchain.h"
20 20
21bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event) 21bool ip_callchain__valid(struct ip_callchain *chain,
22 const union perf_event *event)
22{ 23{
23 unsigned int chain_size = event->header.size; 24 unsigned int chain_size = event->header.size;
24 chain_size -= (unsigned long)&event->ip.__more_data - (unsigned long)event; 25 chain_size -= (unsigned long)&event->ip.__more_data - (unsigned long)event;
@@ -26,10 +27,10 @@ bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event)
26} 27}
27 28
28#define chain_for_each_child(child, parent) \ 29#define chain_for_each_child(child, parent) \
29 list_for_each_entry(child, &parent->children, brothers) 30 list_for_each_entry(child, &parent->children, siblings)
30 31
31#define chain_for_each_child_safe(child, next, parent) \ 32#define chain_for_each_child_safe(child, next, parent) \
32 list_for_each_entry_safe(child, next, &parent->children, brothers) 33 list_for_each_entry_safe(child, next, &parent->children, siblings)
33 34
34static void 35static void
35rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, 36rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
@@ -38,14 +39,14 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
38 struct rb_node **p = &root->rb_node; 39 struct rb_node **p = &root->rb_node;
39 struct rb_node *parent = NULL; 40 struct rb_node *parent = NULL;
40 struct callchain_node *rnode; 41 struct callchain_node *rnode;
41 u64 chain_cumul = cumul_hits(chain); 42 u64 chain_cumul = callchain_cumul_hits(chain);
42 43
43 while (*p) { 44 while (*p) {
44 u64 rnode_cumul; 45 u64 rnode_cumul;
45 46
46 parent = *p; 47 parent = *p;
47 rnode = rb_entry(parent, struct callchain_node, rb_node); 48 rnode = rb_entry(parent, struct callchain_node, rb_node);
48 rnode_cumul = cumul_hits(rnode); 49 rnode_cumul = callchain_cumul_hits(rnode);
49 50
50 switch (mode) { 51 switch (mode) {
51 case CHAIN_FLAT: 52 case CHAIN_FLAT:
@@ -104,7 +105,7 @@ static void __sort_chain_graph_abs(struct callchain_node *node,
104 105
105 chain_for_each_child(child, node) { 106 chain_for_each_child(child, node) {
106 __sort_chain_graph_abs(child, min_hit); 107 __sort_chain_graph_abs(child, min_hit);
107 if (cumul_hits(child) >= min_hit) 108 if (callchain_cumul_hits(child) >= min_hit)
108 rb_insert_callchain(&node->rb_root, child, 109 rb_insert_callchain(&node->rb_root, child,
109 CHAIN_GRAPH_ABS); 110 CHAIN_GRAPH_ABS);
110 } 111 }
@@ -129,7 +130,7 @@ static void __sort_chain_graph_rel(struct callchain_node *node,
129 130
130 chain_for_each_child(child, node) { 131 chain_for_each_child(child, node) {
131 __sort_chain_graph_rel(child, min_percent); 132 __sort_chain_graph_rel(child, min_percent);
132 if (cumul_hits(child) >= min_hit) 133 if (callchain_cumul_hits(child) >= min_hit)
133 rb_insert_callchain(&node->rb_root, child, 134 rb_insert_callchain(&node->rb_root, child,
134 CHAIN_GRAPH_REL); 135 CHAIN_GRAPH_REL);
135 } 136 }
@@ -143,7 +144,7 @@ sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_root *chain_root,
143 rb_root->rb_node = chain_root->node.rb_root.rb_node; 144 rb_root->rb_node = chain_root->node.rb_root.rb_node;
144} 145}
145 146
146int register_callchain_param(struct callchain_param *param) 147int callchain_register_param(struct callchain_param *param)
147{ 148{
148 switch (param->mode) { 149 switch (param->mode) {
149 case CHAIN_GRAPH_ABS: 150 case CHAIN_GRAPH_ABS:
@@ -189,32 +190,27 @@ create_child(struct callchain_node *parent, bool inherit_children)
189 chain_for_each_child(next, new) 190 chain_for_each_child(next, new)
190 next->parent = new; 191 next->parent = new;
191 } 192 }
192 list_add_tail(&new->brothers, &parent->children); 193 list_add_tail(&new->siblings, &parent->children);
193 194
194 return new; 195 return new;
195} 196}
196 197
197 198
198struct resolved_ip {
199 u64 ip;
200 struct map_symbol ms;
201};
202
203struct resolved_chain {
204 u64 nr;
205 struct resolved_ip ips[0];
206};
207
208
209/* 199/*
210 * Fill the node with callchain values 200 * Fill the node with callchain values
211 */ 201 */
212static void 202static void
213fill_node(struct callchain_node *node, struct resolved_chain *chain, int start) 203fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
214{ 204{
215 unsigned int i; 205 struct callchain_cursor_node *cursor_node;
206
207 node->val_nr = cursor->nr - cursor->pos;
208 if (!node->val_nr)
209 pr_warning("Warning: empty node in callchain tree\n");
216 210
217 for (i = start; i < chain->nr; i++) { 211 cursor_node = callchain_cursor_current(cursor);
212
213 while (cursor_node) {
218 struct callchain_list *call; 214 struct callchain_list *call;
219 215
220 call = zalloc(sizeof(*call)); 216 call = zalloc(sizeof(*call));
@@ -222,23 +218,25 @@ fill_node(struct callchain_node *node, struct resolved_chain *chain, int start)
222 perror("not enough memory for the code path tree"); 218 perror("not enough memory for the code path tree");
223 return; 219 return;
224 } 220 }
225 call->ip = chain->ips[i].ip; 221 call->ip = cursor_node->ip;
226 call->ms = chain->ips[i].ms; 222 call->ms.sym = cursor_node->sym;
223 call->ms.map = cursor_node->map;
227 list_add_tail(&call->list, &node->val); 224 list_add_tail(&call->list, &node->val);
225
226 callchain_cursor_advance(cursor);
227 cursor_node = callchain_cursor_current(cursor);
228 } 228 }
229 node->val_nr = chain->nr - start;
230 if (!node->val_nr)
231 pr_warning("Warning: empty node in callchain tree\n");
232} 229}
233 230
234static void 231static void
235add_child(struct callchain_node *parent, struct resolved_chain *chain, 232add_child(struct callchain_node *parent,
236 int start, u64 period) 233 struct callchain_cursor *cursor,
234 u64 period)
237{ 235{
238 struct callchain_node *new; 236 struct callchain_node *new;
239 237
240 new = create_child(parent, false); 238 new = create_child(parent, false);
241 fill_node(new, chain, start); 239 fill_node(new, cursor);
242 240
243 new->children_hit = 0; 241 new->children_hit = 0;
244 new->hit = period; 242 new->hit = period;
@@ -250,9 +248,10 @@ add_child(struct callchain_node *parent, struct resolved_chain *chain,
250 * Then create another child to host the given callchain of new branch 248 * Then create another child to host the given callchain of new branch
251 */ 249 */
252static void 250static void
253split_add_child(struct callchain_node *parent, struct resolved_chain *chain, 251split_add_child(struct callchain_node *parent,
254 struct callchain_list *to_split, int idx_parents, int idx_local, 252 struct callchain_cursor *cursor,
255 u64 period) 253 struct callchain_list *to_split,
254 u64 idx_parents, u64 idx_local, u64 period)
256{ 255{
257 struct callchain_node *new; 256 struct callchain_node *new;
258 struct list_head *old_tail; 257 struct list_head *old_tail;
@@ -272,14 +271,14 @@ split_add_child(struct callchain_node *parent, struct resolved_chain *chain,
272 /* split the hits */ 271 /* split the hits */
273 new->hit = parent->hit; 272 new->hit = parent->hit;
274 new->children_hit = parent->children_hit; 273 new->children_hit = parent->children_hit;
275 parent->children_hit = cumul_hits(new); 274 parent->children_hit = callchain_cumul_hits(new);
276 new->val_nr = parent->val_nr - idx_local; 275 new->val_nr = parent->val_nr - idx_local;
277 parent->val_nr = idx_local; 276 parent->val_nr = idx_local;
278 277
279 /* create a new child for the new branch if any */ 278 /* create a new child for the new branch if any */
280 if (idx_total < chain->nr) { 279 if (idx_total < cursor->nr) {
281 parent->hit = 0; 280 parent->hit = 0;
282 add_child(parent, chain, idx_total, period); 281 add_child(parent, cursor, period);
283 parent->children_hit += period; 282 parent->children_hit += period;
284 } else { 283 } else {
285 parent->hit = period; 284 parent->hit = period;
@@ -287,36 +286,41 @@ split_add_child(struct callchain_node *parent, struct resolved_chain *chain,
287} 286}
288 287
289static int 288static int
290append_chain(struct callchain_node *root, struct resolved_chain *chain, 289append_chain(struct callchain_node *root,
291 unsigned int start, u64 period); 290 struct callchain_cursor *cursor,
291 u64 period);
292 292
293static void 293static void
294append_chain_children(struct callchain_node *root, struct resolved_chain *chain, 294append_chain_children(struct callchain_node *root,
295 unsigned int start, u64 period) 295 struct callchain_cursor *cursor,
296 u64 period)
296{ 297{
297 struct callchain_node *rnode; 298 struct callchain_node *rnode;
298 299
299 /* lookup in childrens */ 300 /* lookup in childrens */
300 chain_for_each_child(rnode, root) { 301 chain_for_each_child(rnode, root) {
301 unsigned int ret = append_chain(rnode, chain, start, period); 302 unsigned int ret = append_chain(rnode, cursor, period);
302 303
303 if (!ret) 304 if (!ret)
304 goto inc_children_hit; 305 goto inc_children_hit;
305 } 306 }
306 /* nothing in children, add to the current node */ 307 /* nothing in children, add to the current node */
307 add_child(root, chain, start, period); 308 add_child(root, cursor, period);
308 309
309inc_children_hit: 310inc_children_hit:
310 root->children_hit += period; 311 root->children_hit += period;
311} 312}
312 313
313static int 314static int
314append_chain(struct callchain_node *root, struct resolved_chain *chain, 315append_chain(struct callchain_node *root,
315 unsigned int start, u64 period) 316 struct callchain_cursor *cursor,
317 u64 period)
316{ 318{
319 struct callchain_cursor_node *curr_snap = cursor->curr;
317 struct callchain_list *cnode; 320 struct callchain_list *cnode;
318 unsigned int i = start; 321 u64 start = cursor->pos;
319 bool found = false; 322 bool found = false;
323 u64 matches;
320 324
321 /* 325 /*
322 * Lookup in the current node 326 * Lookup in the current node
@@ -324,141 +328,134 @@ append_chain(struct callchain_node *root, struct resolved_chain *chain,
324 * anywhere inside a function. 328 * anywhere inside a function.
325 */ 329 */
326 list_for_each_entry(cnode, &root->val, list) { 330 list_for_each_entry(cnode, &root->val, list) {
331 struct callchain_cursor_node *node;
327 struct symbol *sym; 332 struct symbol *sym;
328 333
329 if (i == chain->nr) 334 node = callchain_cursor_current(cursor);
335 if (!node)
330 break; 336 break;
331 337
332 sym = chain->ips[i].ms.sym; 338 sym = node->sym;
333 339
334 if (cnode->ms.sym && sym) { 340 if (cnode->ms.sym && sym) {
335 if (cnode->ms.sym->start != sym->start) 341 if (cnode->ms.sym->start != sym->start)
336 break; 342 break;
337 } else if (cnode->ip != chain->ips[i].ip) 343 } else if (cnode->ip != node->ip)
338 break; 344 break;
339 345
340 if (!found) 346 if (!found)
341 found = true; 347 found = true;
342 i++; 348
349 callchain_cursor_advance(cursor);
343 } 350 }
344 351
345 /* matches not, relay on the parent */ 352 /* matches not, relay on the parent */
346 if (!found) 353 if (!found) {
354 cursor->curr = curr_snap;
355 cursor->pos = start;
347 return -1; 356 return -1;
357 }
358
359 matches = cursor->pos - start;
348 360
349 /* we match only a part of the node. Split it and add the new chain */ 361 /* we match only a part of the node. Split it and add the new chain */
350 if (i - start < root->val_nr) { 362 if (matches < root->val_nr) {
351 split_add_child(root, chain, cnode, start, i - start, period); 363 split_add_child(root, cursor, cnode, start, matches, period);
352 return 0; 364 return 0;
353 } 365 }
354 366
355 /* we match 100% of the path, increment the hit */ 367 /* we match 100% of the path, increment the hit */
356 if (i - start == root->val_nr && i == chain->nr) { 368 if (matches == root->val_nr && cursor->pos == cursor->nr) {
357 root->hit += period; 369 root->hit += period;
358 return 0; 370 return 0;
359 } 371 }
360 372
361 /* We match the node and still have a part remaining */ 373 /* We match the node and still have a part remaining */
362 append_chain_children(root, chain, i, period); 374 append_chain_children(root, cursor, period);
363 375
364 return 0; 376 return 0;
365} 377}
366 378
367static void filter_context(struct ip_callchain *old, struct resolved_chain *new, 379int callchain_append(struct callchain_root *root,
368 struct map_symbol *syms) 380 struct callchain_cursor *cursor,
369{ 381 u64 period)
370 int i, j = 0;
371
372 for (i = 0; i < (int)old->nr; i++) {
373 if (old->ips[i] >= PERF_CONTEXT_MAX)
374 continue;
375
376 new->ips[j].ip = old->ips[i];
377 new->ips[j].ms = syms[i];
378 j++;
379 }
380
381 new->nr = j;
382}
383
384
385int callchain_append(struct callchain_root *root, struct ip_callchain *chain,
386 struct map_symbol *syms, u64 period)
387{ 382{
388 struct resolved_chain *filtered; 383 if (!cursor->nr)
389
390 if (!chain->nr)
391 return 0; 384 return 0;
392 385
393 filtered = zalloc(sizeof(*filtered) + 386 callchain_cursor_commit(cursor);
394 chain->nr * sizeof(struct resolved_ip));
395 if (!filtered)
396 return -ENOMEM;
397
398 filter_context(chain, filtered, syms);
399
400 if (!filtered->nr)
401 goto end;
402 387
403 append_chain_children(&root->node, filtered, 0, period); 388 append_chain_children(&root->node, cursor, period);
404 389
405 if (filtered->nr > root->max_depth) 390 if (cursor->nr > root->max_depth)
406 root->max_depth = filtered->nr; 391 root->max_depth = cursor->nr;
407end:
408 free(filtered);
409 392
410 return 0; 393 return 0;
411} 394}
412 395
413static int 396static int
414merge_chain_branch(struct callchain_node *dst, struct callchain_node *src, 397merge_chain_branch(struct callchain_cursor *cursor,
415 struct resolved_chain *chain) 398 struct callchain_node *dst, struct callchain_node *src)
416{ 399{
400 struct callchain_cursor_node **old_last = cursor->last;
417 struct callchain_node *child, *next_child; 401 struct callchain_node *child, *next_child;
418 struct callchain_list *list, *next_list; 402 struct callchain_list *list, *next_list;
419 int old_pos = chain->nr; 403 int old_pos = cursor->nr;
420 int err = 0; 404 int err = 0;
421 405
422 list_for_each_entry_safe(list, next_list, &src->val, list) { 406 list_for_each_entry_safe(list, next_list, &src->val, list) {
423 chain->ips[chain->nr].ip = list->ip; 407 callchain_cursor_append(cursor, list->ip,
424 chain->ips[chain->nr].ms = list->ms; 408 list->ms.map, list->ms.sym);
425 chain->nr++;
426 list_del(&list->list); 409 list_del(&list->list);
427 free(list); 410 free(list);
428 } 411 }
429 412
430 if (src->hit) 413 if (src->hit) {
431 append_chain_children(dst, chain, 0, src->hit); 414 callchain_cursor_commit(cursor);
415 append_chain_children(dst, cursor, src->hit);
416 }
432 417
433 chain_for_each_child_safe(child, next_child, src) { 418 chain_for_each_child_safe(child, next_child, src) {
434 err = merge_chain_branch(dst, child, chain); 419 err = merge_chain_branch(cursor, dst, child);
435 if (err) 420 if (err)
436 break; 421 break;
437 422
438 list_del(&child->brothers); 423 list_del(&child->siblings);
439 free(child); 424 free(child);
440 } 425 }
441 426
442 chain->nr = old_pos; 427 cursor->nr = old_pos;
428 cursor->last = old_last;
443 429
444 return err; 430 return err;
445} 431}
446 432
447int callchain_merge(struct callchain_root *dst, struct callchain_root *src) 433int callchain_merge(struct callchain_cursor *cursor,
434 struct callchain_root *dst, struct callchain_root *src)
435{
436 return merge_chain_branch(cursor, &dst->node, &src->node);
437}
438
439int callchain_cursor_append(struct callchain_cursor *cursor,
440 u64 ip, struct map *map, struct symbol *sym)
448{ 441{
449 struct resolved_chain *chain; 442 struct callchain_cursor_node *node = *cursor->last;
450 int err;
451 443
452 chain = malloc(sizeof(*chain) + 444 if (!node) {
453 src->max_depth * sizeof(struct resolved_ip)); 445 node = calloc(sizeof(*node), 1);
454 if (!chain) 446 if (!node)
455 return -ENOMEM; 447 return -ENOMEM;
456 448
457 chain->nr = 0; 449 *cursor->last = node;
450 }
458 451
459 err = merge_chain_branch(&dst->node, &src->node, chain); 452 node->ip = ip;
453 node->map = map;
454 node->sym = sym;
460 455
461 free(chain); 456 cursor->nr++;
462 457
463 return err; 458 cursor->last = &node->next;
459
460 return 0;
464} 461}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index c15fb8c24ad2..1a79df9f739f 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -16,7 +16,7 @@ enum chain_mode {
16 16
17struct callchain_node { 17struct callchain_node {
18 struct callchain_node *parent; 18 struct callchain_node *parent;
19 struct list_head brothers; 19 struct list_head siblings;
20 struct list_head children; 20 struct list_head children;
21 struct list_head val; 21 struct list_head val;
22 struct rb_node rb_node; /* to sort nodes in an rbtree */ 22 struct rb_node rb_node; /* to sort nodes in an rbtree */
@@ -49,9 +49,30 @@ struct callchain_list {
49 struct list_head list; 49 struct list_head list;
50}; 50};
51 51
52/*
53 * A callchain cursor is a single linked list that
54 * let one feed a callchain progressively.
55 * It keeps persitent allocated entries to minimize
56 * allocations.
57 */
58struct callchain_cursor_node {
59 u64 ip;
60 struct map *map;
61 struct symbol *sym;
62 struct callchain_cursor_node *next;
63};
64
65struct callchain_cursor {
66 u64 nr;
67 struct callchain_cursor_node *first;
68 struct callchain_cursor_node **last;
69 u64 pos;
70 struct callchain_cursor_node *curr;
71};
72
52static inline void callchain_init(struct callchain_root *root) 73static inline void callchain_init(struct callchain_root *root)
53{ 74{
54 INIT_LIST_HEAD(&root->node.brothers); 75 INIT_LIST_HEAD(&root->node.siblings);
55 INIT_LIST_HEAD(&root->node.children); 76 INIT_LIST_HEAD(&root->node.children);
56 INIT_LIST_HEAD(&root->node.val); 77 INIT_LIST_HEAD(&root->node.val);
57 78
@@ -61,15 +82,54 @@ static inline void callchain_init(struct callchain_root *root)
61 root->max_depth = 0; 82 root->max_depth = 0;
62} 83}
63 84
64static inline u64 cumul_hits(struct callchain_node *node) 85static inline u64 callchain_cumul_hits(struct callchain_node *node)
65{ 86{
66 return node->hit + node->children_hit; 87 return node->hit + node->children_hit;
67} 88}
68 89
69int register_callchain_param(struct callchain_param *param); 90int callchain_register_param(struct callchain_param *param);
70int callchain_append(struct callchain_root *root, struct ip_callchain *chain, 91int callchain_append(struct callchain_root *root,
71 struct map_symbol *syms, u64 period); 92 struct callchain_cursor *cursor,
72int callchain_merge(struct callchain_root *dst, struct callchain_root *src); 93 u64 period);
94
95int callchain_merge(struct callchain_cursor *cursor,
96 struct callchain_root *dst, struct callchain_root *src);
97
98bool ip_callchain__valid(struct ip_callchain *chain,
99 const union perf_event *event);
100/*
101 * Initialize a cursor before adding entries inside, but keep
102 * the previously allocated entries as a cache.
103 */
104static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
105{
106 cursor->nr = 0;
107 cursor->last = &cursor->first;
108}
109
110int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
111 struct map *map, struct symbol *sym);
73 112
74bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event); 113/* Close a cursor writing session. Initialize for the reader */
114static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
115{
116 cursor->curr = cursor->first;
117 cursor->pos = 0;
118}
119
120/* Cursor reading iteration helpers */
121static inline struct callchain_cursor_node *
122callchain_cursor_current(struct callchain_cursor *cursor)
123{
124 if (cursor->pos == cursor->nr)
125 return NULL;
126
127 return cursor->curr;
128}
129
130static inline void callchain_cursor_advance(struct callchain_cursor *cursor)
131{
132 cursor->curr = cursor->curr->next;
133 cursor->pos++;
134}
75#endif /* __PERF_CALLCHAIN_H */ 135#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
new file mode 100644
index 000000000000..9fea75535221
--- /dev/null
+++ b/tools/perf/util/cgroup.c
@@ -0,0 +1,178 @@
1#include "util.h"
2#include "../perf.h"
3#include "parse-options.h"
4#include "evsel.h"
5#include "cgroup.h"
6#include "debugfs.h" /* MAX_PATH, STR() */
7#include "evlist.h"
8
9int nr_cgroups;
10
11static int
12cgroupfs_find_mountpoint(char *buf, size_t maxlen)
13{
14 FILE *fp;
15 char mountpoint[MAX_PATH+1], tokens[MAX_PATH+1], type[MAX_PATH+1];
16 char *token, *saved_ptr;
17 int found = 0;
18
19 fp = fopen("/proc/mounts", "r");
20 if (!fp)
21 return -1;
22
23 /*
24 * in order to handle split hierarchy, we need to scan /proc/mounts
25 * and inspect every cgroupfs mount point to find one that has
26 * perf_event subsystem
27 */
28 while (fscanf(fp, "%*s %"STR(MAX_PATH)"s %"STR(MAX_PATH)"s %"
29 STR(MAX_PATH)"s %*d %*d\n",
30 mountpoint, type, tokens) == 3) {
31
32 if (!strcmp(type, "cgroup")) {
33
34 token = strtok_r(tokens, ",", &saved_ptr);
35
36 while (token != NULL) {
37 if (!strcmp(token, "perf_event")) {
38 found = 1;
39 break;
40 }
41 token = strtok_r(NULL, ",", &saved_ptr);
42 }
43 }
44 if (found)
45 break;
46 }
47 fclose(fp);
48 if (!found)
49 return -1;
50
51 if (strlen(mountpoint) < maxlen) {
52 strcpy(buf, mountpoint);
53 return 0;
54 }
55 return -1;
56}
57
58static int open_cgroup(char *name)
59{
60 char path[MAX_PATH+1];
61 char mnt[MAX_PATH+1];
62 int fd;
63
64
65 if (cgroupfs_find_mountpoint(mnt, MAX_PATH+1))
66 return -1;
67
68 snprintf(path, MAX_PATH, "%s/%s", mnt, name);
69
70 fd = open(path, O_RDONLY);
71 if (fd == -1)
72 fprintf(stderr, "no access to cgroup %s\n", path);
73
74 return fd;
75}
76
77static int add_cgroup(struct perf_evlist *evlist, char *str)
78{
79 struct perf_evsel *counter;
80 struct cgroup_sel *cgrp = NULL;
81 int n;
82 /*
83 * check if cgrp is already defined, if so we reuse it
84 */
85 list_for_each_entry(counter, &evlist->entries, node) {
86 cgrp = counter->cgrp;
87 if (!cgrp)
88 continue;
89 if (!strcmp(cgrp->name, str))
90 break;
91
92 cgrp = NULL;
93 }
94
95 if (!cgrp) {
96 cgrp = zalloc(sizeof(*cgrp));
97 if (!cgrp)
98 return -1;
99
100 cgrp->name = str;
101
102 cgrp->fd = open_cgroup(str);
103 if (cgrp->fd == -1) {
104 free(cgrp);
105 return -1;
106 }
107 }
108
109 /*
110 * find corresponding event
111 * if add cgroup N, then need to find event N
112 */
113 n = 0;
114 list_for_each_entry(counter, &evlist->entries, node) {
115 if (n == nr_cgroups)
116 goto found;
117 n++;
118 }
119 if (cgrp->refcnt == 0)
120 free(cgrp);
121
122 return -1;
123found:
124 cgrp->refcnt++;
125 counter->cgrp = cgrp;
126 return 0;
127}
128
129void close_cgroup(struct cgroup_sel *cgrp)
130{
131 if (!cgrp)
132 return;
133
134 /* XXX: not reentrant */
135 if (--cgrp->refcnt == 0) {
136 close(cgrp->fd);
137 free(cgrp->name);
138 free(cgrp);
139 }
140}
141
142int parse_cgroups(const struct option *opt __used, const char *str,
143 int unset __used)
144{
145 struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
146 const char *p, *e, *eos = str + strlen(str);
147 char *s;
148 int ret;
149
150 if (list_empty(&evlist->entries)) {
151 fprintf(stderr, "must define events before cgroups\n");
152 return -1;
153 }
154
155 for (;;) {
156 p = strchr(str, ',');
157 e = p ? p : eos;
158
159 /* allow empty cgroups, i.e., skip */
160 if (e - str) {
161 /* termination added */
162 s = strndup(str, e - str);
163 if (!s)
164 return -1;
165 ret = add_cgroup(evlist, s);
166 if (ret) {
167 free(s);
168 return -1;
169 }
170 }
171 /* nr_cgroups is increased een for empty cgroups */
172 nr_cgroups++;
173 if (!p)
174 break;
175 str = p+1;
176 }
177 return 0;
178}
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
new file mode 100644
index 000000000000..89acd6debdc5
--- /dev/null
+++ b/tools/perf/util/cgroup.h
@@ -0,0 +1,17 @@
1#ifndef __CGROUP_H__
2#define __CGROUP_H__
3
4struct option;
5
6struct cgroup_sel {
7 char *name;
8 int fd;
9 int refcnt;
10};
11
12
13extern int nr_cgroups; /* number of explicit cgroups defined */
14extern void close_cgroup(struct cgroup_sel *cgrp);
15extern int parse_cgroups(const struct option *opt, const char *str, int unset);
16
17#endif /* __CGROUP_H__ */
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 3ccaa1043383..6893eec693ab 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -177,3 +177,8 @@ struct cpu_map *cpu_map__dummy_new(void)
177 177
178 return cpus; 178 return cpus;
179} 179}
180
181void cpu_map__delete(struct cpu_map *map)
182{
183 free(map);
184}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index f7a4f42f6307..072c0a374794 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -8,6 +8,6 @@ struct cpu_map {
8 8
9struct cpu_map *cpu_map__new(const char *cpu_list); 9struct cpu_map *cpu_map__new(const char *cpu_list);
10struct cpu_map *cpu_map__dummy_new(void); 10struct cpu_map *cpu_map__dummy_new(void);
11void *cpu_map__delete(struct cpu_map *map); 11void cpu_map__delete(struct cpu_map *map);
12 12
13#endif /* __PERF_CPUMAP_H */ 13#endif /* __PERF_CPUMAP_H */
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 01bbe8ecec3f..d4536a9e0d8c 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -57,7 +57,7 @@ void ui__warning(const char *format, ...)
57} 57}
58#endif 58#endif
59 59
60void trace_event(event_t *event) 60void trace_event(union perf_event *event)
61{ 61{
62 unsigned char *raw_event = (void *)event; 62 unsigned char *raw_event = (void *)event;
63 const char *color = PERF_COLOR_BLUE; 63 const char *color = PERF_COLOR_BLUE;
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index ca35fd66b5df..93516cf4682c 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -9,7 +9,7 @@ extern int verbose;
9extern bool quiet, dump_trace; 9extern bool quiet, dump_trace;
10 10
11int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); 11int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
12void trace_event(event_t *event); 12void trace_event(union perf_event *event);
13 13
14struct ui_progress; 14struct ui_progress;
15 15
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 50d0a931497a..2b15c362ef56 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -6,8 +6,9 @@
6#include "string.h" 6#include "string.h"
7#include "strlist.h" 7#include "strlist.h"
8#include "thread.h" 8#include "thread.h"
9#include "thread_map.h"
9 10
10static const char *event__name[] = { 11static const char *perf_event__names[] = {
11 [0] = "TOTAL", 12 [0] = "TOTAL",
12 [PERF_RECORD_MMAP] = "MMAP", 13 [PERF_RECORD_MMAP] = "MMAP",
13 [PERF_RECORD_LOST] = "LOST", 14 [PERF_RECORD_LOST] = "LOST",
@@ -25,16 +26,16 @@ static const char *event__name[] = {
25 [PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND", 26 [PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND",
26}; 27};
27 28
28const char *event__get_event_name(unsigned int id) 29const char *perf_event__name(unsigned int id)
29{ 30{
30 if (id >= ARRAY_SIZE(event__name)) 31 if (id >= ARRAY_SIZE(perf_event__names))
31 return "INVALID"; 32 return "INVALID";
32 if (!event__name[id]) 33 if (!perf_event__names[id])
33 return "UNKNOWN"; 34 return "UNKNOWN";
34 return event__name[id]; 35 return perf_event__names[id];
35} 36}
36 37
37static struct sample_data synth_sample = { 38static struct perf_sample synth_sample = {
38 .pid = -1, 39 .pid = -1,
39 .tid = -1, 40 .tid = -1,
40 .time = -1, 41 .time = -1,
@@ -43,9 +44,9 @@ static struct sample_data synth_sample = {
43 .period = 1, 44 .period = 1,
44}; 45};
45 46
46static pid_t event__synthesize_comm(event_t *event, pid_t pid, int full, 47static pid_t perf_event__synthesize_comm(union perf_event *event, pid_t pid,
47 event__handler_t process, 48 int full, perf_event__handler_t process,
48 struct perf_session *session) 49 struct perf_session *session)
49{ 50{
50 char filename[PATH_MAX]; 51 char filename[PATH_MAX];
51 char bf[BUFSIZ]; 52 char bf[BUFSIZ];
@@ -126,9 +127,10 @@ out:
126 return tgid; 127 return tgid;
127} 128}
128 129
129static int event__synthesize_mmap_events(event_t *event, pid_t pid, pid_t tgid, 130static int perf_event__synthesize_mmap_events(union perf_event *event,
130 event__handler_t process, 131 pid_t pid, pid_t tgid,
131 struct perf_session *session) 132 perf_event__handler_t process,
133 struct perf_session *session)
132{ 134{
133 char filename[PATH_MAX]; 135 char filename[PATH_MAX];
134 FILE *fp; 136 FILE *fp;
@@ -199,14 +201,14 @@ static int event__synthesize_mmap_events(event_t *event, pid_t pid, pid_t tgid,
199 return 0; 201 return 0;
200} 202}
201 203
202int event__synthesize_modules(event__handler_t process, 204int perf_event__synthesize_modules(perf_event__handler_t process,
203 struct perf_session *session, 205 struct perf_session *session,
204 struct machine *machine) 206 struct machine *machine)
205{ 207{
206 struct rb_node *nd; 208 struct rb_node *nd;
207 struct map_groups *kmaps = &machine->kmaps; 209 struct map_groups *kmaps = &machine->kmaps;
208 event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size); 210 union perf_event *event = zalloc((sizeof(event->mmap) +
209 211 session->id_hdr_size));
210 if (event == NULL) { 212 if (event == NULL) {
211 pr_debug("Not enough memory synthesizing mmap event " 213 pr_debug("Not enough memory synthesizing mmap event "
212 "for kernel modules\n"); 214 "for kernel modules\n");
@@ -251,23 +253,24 @@ int event__synthesize_modules(event__handler_t process,
251 return 0; 253 return 0;
252} 254}
253 255
254static int __event__synthesize_thread(event_t *comm_event, event_t *mmap_event, 256static int __event__synthesize_thread(union perf_event *comm_event,
255 pid_t pid, event__handler_t process, 257 union perf_event *mmap_event,
258 pid_t pid, perf_event__handler_t process,
256 struct perf_session *session) 259 struct perf_session *session)
257{ 260{
258 pid_t tgid = event__synthesize_comm(comm_event, pid, 1, process, 261 pid_t tgid = perf_event__synthesize_comm(comm_event, pid, 1, process,
259 session); 262 session);
260 if (tgid == -1) 263 if (tgid == -1)
261 return -1; 264 return -1;
262 return event__synthesize_mmap_events(mmap_event, pid, tgid, 265 return perf_event__synthesize_mmap_events(mmap_event, pid, tgid,
263 process, session); 266 process, session);
264} 267}
265 268
266int event__synthesize_thread_map(struct thread_map *threads, 269int perf_event__synthesize_thread_map(struct thread_map *threads,
267 event__handler_t process, 270 perf_event__handler_t process,
268 struct perf_session *session) 271 struct perf_session *session)
269{ 272{
270 event_t *comm_event, *mmap_event; 273 union perf_event *comm_event, *mmap_event;
271 int err = -1, thread; 274 int err = -1, thread;
272 275
273 comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size); 276 comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size);
@@ -294,12 +297,12 @@ out:
294 return err; 297 return err;
295} 298}
296 299
297int event__synthesize_threads(event__handler_t process, 300int perf_event__synthesize_threads(perf_event__handler_t process,
298 struct perf_session *session) 301 struct perf_session *session)
299{ 302{
300 DIR *proc; 303 DIR *proc;
301 struct dirent dirent, *next; 304 struct dirent dirent, *next;
302 event_t *comm_event, *mmap_event; 305 union perf_event *comm_event, *mmap_event;
303 int err = -1; 306 int err = -1;
304 307
305 comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size); 308 comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size);
@@ -357,10 +360,10 @@ static int find_symbol_cb(void *arg, const char *name, char type,
357 return 1; 360 return 1;
358} 361}
359 362
360int event__synthesize_kernel_mmap(event__handler_t process, 363int perf_event__synthesize_kernel_mmap(perf_event__handler_t process,
361 struct perf_session *session, 364 struct perf_session *session,
362 struct machine *machine, 365 struct machine *machine,
363 const char *symbol_name) 366 const char *symbol_name)
364{ 367{
365 size_t size; 368 size_t size;
366 const char *filename, *mmap_name; 369 const char *filename, *mmap_name;
@@ -374,8 +377,8 @@ int event__synthesize_kernel_mmap(event__handler_t process,
374 * kernels. 377 * kernels.
375 */ 378 */
376 struct process_symbol_args args = { .name = symbol_name, }; 379 struct process_symbol_args args = { .name = symbol_name, };
377 event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size); 380 union perf_event *event = zalloc((sizeof(event->mmap) +
378 381 session->id_hdr_size));
379 if (event == NULL) { 382 if (event == NULL) {
380 pr_debug("Not enough memory synthesizing mmap event " 383 pr_debug("Not enough memory synthesizing mmap event "
381 "for kernel modules\n"); 384 "for kernel modules\n");
@@ -421,42 +424,15 @@ int event__synthesize_kernel_mmap(event__handler_t process,
421 return err; 424 return err;
422} 425}
423 426
424static void thread__comm_adjust(struct thread *self, struct hists *hists) 427int perf_event__process_comm(union perf_event *event,
425{ 428 struct perf_sample *sample __used,
426 char *comm = self->comm; 429 struct perf_session *session)
427
428 if (!symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
429 (!symbol_conf.comm_list ||
430 strlist__has_entry(symbol_conf.comm_list, comm))) {
431 u16 slen = strlen(comm);
432
433 if (hists__new_col_len(hists, HISTC_COMM, slen))
434 hists__set_col_len(hists, HISTC_THREAD, slen + 6);
435 }
436}
437
438static int thread__set_comm_adjust(struct thread *self, const char *comm,
439 struct hists *hists)
440{ 430{
441 int ret = thread__set_comm(self, comm); 431 struct thread *thread = perf_session__findnew(session, event->comm.tid);
442
443 if (ret)
444 return ret;
445
446 thread__comm_adjust(self, hists);
447 432
448 return 0; 433 dump_printf(": %s:%d\n", event->comm.comm, event->comm.tid);
449}
450 434
451int event__process_comm(event_t *self, struct sample_data *sample __used, 435 if (thread == NULL || thread__set_comm(thread, event->comm.comm)) {
452 struct perf_session *session)
453{
454 struct thread *thread = perf_session__findnew(session, self->comm.tid);
455
456 dump_printf(": %s:%d\n", self->comm.comm, self->comm.tid);
457
458 if (thread == NULL || thread__set_comm_adjust(thread, self->comm.comm,
459 &session->hists)) {
460 dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); 436 dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
461 return -1; 437 return -1;
462 } 438 }
@@ -464,19 +440,21 @@ int event__process_comm(event_t *self, struct sample_data *sample __used,
464 return 0; 440 return 0;
465} 441}
466 442
467int event__process_lost(event_t *self, struct sample_data *sample __used, 443int perf_event__process_lost(union perf_event *event,
468 struct perf_session *session) 444 struct perf_sample *sample __used,
445 struct perf_session *session)
469{ 446{
470 dump_printf(": id:%" PRIu64 ": lost:%" PRIu64 "\n", 447 dump_printf(": id:%" PRIu64 ": lost:%" PRIu64 "\n",
471 self->lost.id, self->lost.lost); 448 event->lost.id, event->lost.lost);
472 session->hists.stats.total_lost += self->lost.lost; 449 session->hists.stats.total_lost += event->lost.lost;
473 return 0; 450 return 0;
474} 451}
475 452
476static void event_set_kernel_mmap_len(struct map **maps, event_t *self) 453static void perf_event__set_kernel_mmap_len(union perf_event *event,
454 struct map **maps)
477{ 455{
478 maps[MAP__FUNCTION]->start = self->mmap.start; 456 maps[MAP__FUNCTION]->start = event->mmap.start;
479 maps[MAP__FUNCTION]->end = self->mmap.start + self->mmap.len; 457 maps[MAP__FUNCTION]->end = event->mmap.start + event->mmap.len;
480 /* 458 /*
481 * Be a bit paranoid here, some perf.data file came with 459 * Be a bit paranoid here, some perf.data file came with
482 * a zero sized synthesized MMAP event for the kernel. 460 * a zero sized synthesized MMAP event for the kernel.
@@ -485,8 +463,8 @@ static void event_set_kernel_mmap_len(struct map **maps, event_t *self)
485 maps[MAP__FUNCTION]->end = ~0ULL; 463 maps[MAP__FUNCTION]->end = ~0ULL;
486} 464}
487 465
488static int event__process_kernel_mmap(event_t *self, 466static int perf_event__process_kernel_mmap(union perf_event *event,
489 struct perf_session *session) 467 struct perf_session *session)
490{ 468{
491 struct map *map; 469 struct map *map;
492 char kmmap_prefix[PATH_MAX]; 470 char kmmap_prefix[PATH_MAX];
@@ -494,9 +472,9 @@ static int event__process_kernel_mmap(event_t *self,
494 enum dso_kernel_type kernel_type; 472 enum dso_kernel_type kernel_type;
495 bool is_kernel_mmap; 473 bool is_kernel_mmap;
496 474
497 machine = perf_session__findnew_machine(session, self->mmap.pid); 475 machine = perf_session__findnew_machine(session, event->mmap.pid);
498 if (!machine) { 476 if (!machine) {
499 pr_err("Can't find id %d's machine\n", self->mmap.pid); 477 pr_err("Can't find id %d's machine\n", event->mmap.pid);
500 goto out_problem; 478 goto out_problem;
501 } 479 }
502 480
@@ -506,17 +484,17 @@ static int event__process_kernel_mmap(event_t *self,
506 else 484 else
507 kernel_type = DSO_TYPE_GUEST_KERNEL; 485 kernel_type = DSO_TYPE_GUEST_KERNEL;
508 486
509 is_kernel_mmap = memcmp(self->mmap.filename, 487 is_kernel_mmap = memcmp(event->mmap.filename,
510 kmmap_prefix, 488 kmmap_prefix,
511 strlen(kmmap_prefix)) == 0; 489 strlen(kmmap_prefix)) == 0;
512 if (self->mmap.filename[0] == '/' || 490 if (event->mmap.filename[0] == '/' ||
513 (!is_kernel_mmap && self->mmap.filename[0] == '[')) { 491 (!is_kernel_mmap && event->mmap.filename[0] == '[')) {
514 492
515 char short_module_name[1024]; 493 char short_module_name[1024];
516 char *name, *dot; 494 char *name, *dot;
517 495
518 if (self->mmap.filename[0] == '/') { 496 if (event->mmap.filename[0] == '/') {
519 name = strrchr(self->mmap.filename, '/'); 497 name = strrchr(event->mmap.filename, '/');
520 if (name == NULL) 498 if (name == NULL)
521 goto out_problem; 499 goto out_problem;
522 500
@@ -528,10 +506,10 @@ static int event__process_kernel_mmap(event_t *self,
528 "[%.*s]", (int)(dot - name), name); 506 "[%.*s]", (int)(dot - name), name);
529 strxfrchar(short_module_name, '-', '_'); 507 strxfrchar(short_module_name, '-', '_');
530 } else 508 } else
531 strcpy(short_module_name, self->mmap.filename); 509 strcpy(short_module_name, event->mmap.filename);
532 510
533 map = machine__new_module(machine, self->mmap.start, 511 map = machine__new_module(machine, event->mmap.start,
534 self->mmap.filename); 512 event->mmap.filename);
535 if (map == NULL) 513 if (map == NULL)
536 goto out_problem; 514 goto out_problem;
537 515
@@ -541,9 +519,9 @@ static int event__process_kernel_mmap(event_t *self,
541 519
542 map->dso->short_name = name; 520 map->dso->short_name = name;
543 map->dso->sname_alloc = 1; 521 map->dso->sname_alloc = 1;
544 map->end = map->start + self->mmap.len; 522 map->end = map->start + event->mmap.len;
545 } else if (is_kernel_mmap) { 523 } else if (is_kernel_mmap) {
546 const char *symbol_name = (self->mmap.filename + 524 const char *symbol_name = (event->mmap.filename +
547 strlen(kmmap_prefix)); 525 strlen(kmmap_prefix));
548 /* 526 /*
549 * Should be there already, from the build-id table in 527 * Should be there already, from the build-id table in
@@ -558,10 +536,10 @@ static int event__process_kernel_mmap(event_t *self,
558 if (__machine__create_kernel_maps(machine, kernel) < 0) 536 if (__machine__create_kernel_maps(machine, kernel) < 0)
559 goto out_problem; 537 goto out_problem;
560 538
561 event_set_kernel_mmap_len(machine->vmlinux_maps, self); 539 perf_event__set_kernel_mmap_len(event, machine->vmlinux_maps);
562 perf_session__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, 540 perf_session__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps,
563 symbol_name, 541 symbol_name,
564 self->mmap.pgoff); 542 event->mmap.pgoff);
565 if (machine__is_default_guest(machine)) { 543 if (machine__is_default_guest(machine)) {
566 /* 544 /*
567 * preload dso of guest kernel and modules 545 * preload dso of guest kernel and modules
@@ -575,22 +553,23 @@ out_problem:
575 return -1; 553 return -1;
576} 554}
577 555
578int event__process_mmap(event_t *self, struct sample_data *sample __used, 556int perf_event__process_mmap(union perf_event *event,
579 struct perf_session *session) 557 struct perf_sample *sample __used,
558 struct perf_session *session)
580{ 559{
581 struct machine *machine; 560 struct machine *machine;
582 struct thread *thread; 561 struct thread *thread;
583 struct map *map; 562 struct map *map;
584 u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 563 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
585 int ret = 0; 564 int ret = 0;
586 565
587 dump_printf(" %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %s\n", 566 dump_printf(" %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %s\n",
588 self->mmap.pid, self->mmap.tid, self->mmap.start, 567 event->mmap.pid, event->mmap.tid, event->mmap.start,
589 self->mmap.len, self->mmap.pgoff, self->mmap.filename); 568 event->mmap.len, event->mmap.pgoff, event->mmap.filename);
590 569
591 if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL || 570 if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
592 cpumode == PERF_RECORD_MISC_KERNEL) { 571 cpumode == PERF_RECORD_MISC_KERNEL) {
593 ret = event__process_kernel_mmap(self, session); 572 ret = perf_event__process_kernel_mmap(event, session);
594 if (ret < 0) 573 if (ret < 0)
595 goto out_problem; 574 goto out_problem;
596 return 0; 575 return 0;
@@ -599,12 +578,12 @@ int event__process_mmap(event_t *self, struct sample_data *sample __used,
599 machine = perf_session__find_host_machine(session); 578 machine = perf_session__find_host_machine(session);
600 if (machine == NULL) 579 if (machine == NULL)
601 goto out_problem; 580 goto out_problem;
602 thread = perf_session__findnew(session, self->mmap.pid); 581 thread = perf_session__findnew(session, event->mmap.pid);
603 if (thread == NULL) 582 if (thread == NULL)
604 goto out_problem; 583 goto out_problem;
605 map = map__new(&machine->user_dsos, self->mmap.start, 584 map = map__new(&machine->user_dsos, event->mmap.start,
606 self->mmap.len, self->mmap.pgoff, 585 event->mmap.len, event->mmap.pgoff,
607 self->mmap.pid, self->mmap.filename, 586 event->mmap.pid, event->mmap.filename,
608 MAP__FUNCTION); 587 MAP__FUNCTION);
609 if (map == NULL) 588 if (map == NULL)
610 goto out_problem; 589 goto out_problem;
@@ -617,16 +596,17 @@ out_problem:
617 return 0; 596 return 0;
618} 597}
619 598
620int event__process_task(event_t *self, struct sample_data *sample __used, 599int perf_event__process_task(union perf_event *event,
621 struct perf_session *session) 600 struct perf_sample *sample __used,
601 struct perf_session *session)
622{ 602{
623 struct thread *thread = perf_session__findnew(session, self->fork.tid); 603 struct thread *thread = perf_session__findnew(session, event->fork.tid);
624 struct thread *parent = perf_session__findnew(session, self->fork.ptid); 604 struct thread *parent = perf_session__findnew(session, event->fork.ptid);
625 605
626 dump_printf("(%d:%d):(%d:%d)\n", self->fork.pid, self->fork.tid, 606 dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid,
627 self->fork.ppid, self->fork.ptid); 607 event->fork.ppid, event->fork.ptid);
628 608
629 if (self->header.type == PERF_RECORD_EXIT) { 609 if (event->header.type == PERF_RECORD_EXIT) {
630 perf_session__remove_thread(session, thread); 610 perf_session__remove_thread(session, thread);
631 return 0; 611 return 0;
632 } 612 }
@@ -640,20 +620,22 @@ int event__process_task(event_t *self, struct sample_data *sample __used,
640 return 0; 620 return 0;
641} 621}
642 622
643int event__process(event_t *event, struct sample_data *sample, 623int perf_event__process(union perf_event *event, struct perf_sample *sample,
644 struct perf_session *session) 624 struct perf_session *session)
645{ 625{
646 switch (event->header.type) { 626 switch (event->header.type) {
647 case PERF_RECORD_COMM: 627 case PERF_RECORD_COMM:
648 event__process_comm(event, sample, session); 628 perf_event__process_comm(event, sample, session);
649 break; 629 break;
650 case PERF_RECORD_MMAP: 630 case PERF_RECORD_MMAP:
651 event__process_mmap(event, sample, session); 631 perf_event__process_mmap(event, sample, session);
652 break; 632 break;
653 case PERF_RECORD_FORK: 633 case PERF_RECORD_FORK:
654 case PERF_RECORD_EXIT: 634 case PERF_RECORD_EXIT:
655 event__process_task(event, sample, session); 635 perf_event__process_task(event, sample, session);
656 break; 636 break;
637 case PERF_RECORD_LOST:
638 perf_event__process_lost(event, sample, session);
657 default: 639 default:
658 break; 640 break;
659 } 641 }
@@ -750,24 +732,14 @@ void thread__find_addr_location(struct thread *self,
750 al->sym = NULL; 732 al->sym = NULL;
751} 733}
752 734
753static void dso__calc_col_width(struct dso *self, struct hists *hists) 735int perf_event__preprocess_sample(const union perf_event *event,
754{ 736 struct perf_session *session,
755 if (!symbol_conf.col_width_list_str && !symbol_conf.field_sep && 737 struct addr_location *al,
756 (!symbol_conf.dso_list || 738 struct perf_sample *sample,
757 strlist__has_entry(symbol_conf.dso_list, self->name))) { 739 symbol_filter_t filter)
758 u16 slen = dso__name_len(self);
759 hists__new_col_len(hists, HISTC_DSO, slen);
760 }
761
762 self->slen_calculated = 1;
763}
764
765int event__preprocess_sample(const event_t *self, struct perf_session *session,
766 struct addr_location *al, struct sample_data *data,
767 symbol_filter_t filter)
768{ 740{
769 u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 741 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
770 struct thread *thread = perf_session__findnew(session, self->ip.pid); 742 struct thread *thread = perf_session__findnew(session, event->ip.pid);
771 743
772 if (thread == NULL) 744 if (thread == NULL)
773 return -1; 745 return -1;
@@ -789,12 +761,12 @@ int event__preprocess_sample(const event_t *self, struct perf_session *session,
789 machine__create_kernel_maps(&session->host_machine); 761 machine__create_kernel_maps(&session->host_machine);
790 762
791 thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION, 763 thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION,
792 self->ip.pid, self->ip.ip, al); 764 event->ip.pid, event->ip.ip, al);
793 dump_printf(" ...... dso: %s\n", 765 dump_printf(" ...... dso: %s\n",
794 al->map ? al->map->dso->long_name : 766 al->map ? al->map->dso->long_name :
795 al->level == 'H' ? "[hypervisor]" : "<not found>"); 767 al->level == 'H' ? "[hypervisor]" : "<not found>");
796 al->sym = NULL; 768 al->sym = NULL;
797 al->cpu = data->cpu; 769 al->cpu = sample->cpu;
798 770
799 if (al->map) { 771 if (al->map) {
800 if (symbol_conf.dso_list && 772 if (symbol_conf.dso_list &&
@@ -805,23 +777,8 @@ int event__preprocess_sample(const event_t *self, struct perf_session *session,
805 strlist__has_entry(symbol_conf.dso_list, 777 strlist__has_entry(symbol_conf.dso_list,
806 al->map->dso->long_name))))) 778 al->map->dso->long_name)))))
807 goto out_filtered; 779 goto out_filtered;
808 /*
809 * We have to do this here as we may have a dso with no symbol
810 * hit that has a name longer than the ones with symbols
811 * sampled.
812 */
813 if (!sort_dso.elide && !al->map->dso->slen_calculated)
814 dso__calc_col_width(al->map->dso, &session->hists);
815 780
816 al->sym = map__find_symbol(al->map, al->addr, filter); 781 al->sym = map__find_symbol(al->map, al->addr, filter);
817 } else {
818 const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
819
820 if (hists__col_len(&session->hists, HISTC_DSO) < unresolved_col_width &&
821 !symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
822 !symbol_conf.dso_list)
823 hists__set_col_len(&session->hists, HISTC_DSO,
824 unresolved_col_width);
825 } 782 }
826 783
827 if (symbol_conf.sym_list && al->sym && 784 if (symbol_conf.sym_list && al->sym &&
@@ -834,128 +791,3 @@ out_filtered:
834 al->filtered = true; 791 al->filtered = true;
835 return 0; 792 return 0;
836} 793}
837
838static int event__parse_id_sample(const event_t *event,
839 struct perf_session *session,
840 struct sample_data *sample)
841{
842 const u64 *array;
843 u64 type;
844
845 sample->cpu = sample->pid = sample->tid = -1;
846 sample->stream_id = sample->id = sample->time = -1ULL;
847
848 if (!session->sample_id_all)
849 return 0;
850
851 array = event->sample.array;
852 array += ((event->header.size -
853 sizeof(event->header)) / sizeof(u64)) - 1;
854 type = session->sample_type;
855
856 if (type & PERF_SAMPLE_CPU) {
857 u32 *p = (u32 *)array;
858 sample->cpu = *p;
859 array--;
860 }
861
862 if (type & PERF_SAMPLE_STREAM_ID) {
863 sample->stream_id = *array;
864 array--;
865 }
866
867 if (type & PERF_SAMPLE_ID) {
868 sample->id = *array;
869 array--;
870 }
871
872 if (type & PERF_SAMPLE_TIME) {
873 sample->time = *array;
874 array--;
875 }
876
877 if (type & PERF_SAMPLE_TID) {
878 u32 *p = (u32 *)array;
879 sample->pid = p[0];
880 sample->tid = p[1];
881 }
882
883 return 0;
884}
885
886int event__parse_sample(const event_t *event, struct perf_session *session,
887 struct sample_data *data)
888{
889 const u64 *array;
890 u64 type;
891
892 if (event->header.type != PERF_RECORD_SAMPLE)
893 return event__parse_id_sample(event, session, data);
894
895 array = event->sample.array;
896 type = session->sample_type;
897
898 if (type & PERF_SAMPLE_IP) {
899 data->ip = event->ip.ip;
900 array++;
901 }
902
903 if (type & PERF_SAMPLE_TID) {
904 u32 *p = (u32 *)array;
905 data->pid = p[0];
906 data->tid = p[1];
907 array++;
908 }
909
910 if (type & PERF_SAMPLE_TIME) {
911 data->time = *array;
912 array++;
913 }
914
915 if (type & PERF_SAMPLE_ADDR) {
916 data->addr = *array;
917 array++;
918 }
919
920 data->id = -1ULL;
921 if (type & PERF_SAMPLE_ID) {
922 data->id = *array;
923 array++;
924 }
925
926 if (type & PERF_SAMPLE_STREAM_ID) {
927 data->stream_id = *array;
928 array++;
929 }
930
931 if (type & PERF_SAMPLE_CPU) {
932 u32 *p = (u32 *)array;
933 data->cpu = *p;
934 array++;
935 } else
936 data->cpu = -1;
937
938 if (type & PERF_SAMPLE_PERIOD) {
939 data->period = *array;
940 array++;
941 }
942
943 if (type & PERF_SAMPLE_READ) {
944 pr_debug("PERF_SAMPLE_READ is unsuported for now\n");
945 return -1;
946 }
947
948 if (type & PERF_SAMPLE_CALLCHAIN) {
949 data->callchain = (struct ip_callchain *)array;
950 array += 1 + data->callchain->nr;
951 }
952
953 if (type & PERF_SAMPLE_RAW) {
954 u32 *p = (u32 *)array;
955 data->raw_size = *p;
956 p++;
957 data->raw_data = p;
958 }
959
960 return 0;
961}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index cc7b52f9b492..9c35170fb379 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -61,7 +61,7 @@ struct sample_event {
61 u64 array[]; 61 u64 array[];
62}; 62};
63 63
64struct sample_data { 64struct perf_sample {
65 u64 ip; 65 u64 ip;
66 u32 pid, tid; 66 u32 pid, tid;
67 u64 time; 67 u64 time;
@@ -117,7 +117,7 @@ struct tracing_data_event {
117 u32 size; 117 u32 size;
118}; 118};
119 119
120typedef union event_union { 120union perf_event {
121 struct perf_event_header header; 121 struct perf_event_header header;
122 struct ip_event ip; 122 struct ip_event ip;
123 struct mmap_event mmap; 123 struct mmap_event mmap;
@@ -130,50 +130,54 @@ typedef union event_union {
130 struct event_type_event event_type; 130 struct event_type_event event_type;
131 struct tracing_data_event tracing_data; 131 struct tracing_data_event tracing_data;
132 struct build_id_event build_id; 132 struct build_id_event build_id;
133} event_t; 133};
134 134
135void event__print_totals(void); 135void perf_event__print_totals(void);
136 136
137struct perf_session; 137struct perf_session;
138struct thread_map; 138struct thread_map;
139 139
140typedef int (*event__handler_synth_t)(event_t *event, 140typedef int (*perf_event__handler_synth_t)(union perf_event *event,
141 struct perf_session *session);
142typedef int (*perf_event__handler_t)(union perf_event *event,
143 struct perf_sample *sample,
141 struct perf_session *session); 144 struct perf_session *session);
142typedef int (*event__handler_t)(event_t *event, struct sample_data *sample, 145
143 struct perf_session *session); 146int perf_event__synthesize_thread_map(struct thread_map *threads,
144 147 perf_event__handler_t process,
145int event__synthesize_thread_map(struct thread_map *threads, 148 struct perf_session *session);
146 event__handler_t process, 149int perf_event__synthesize_threads(perf_event__handler_t process,
147 struct perf_session *session); 150 struct perf_session *session);
148int event__synthesize_threads(event__handler_t process, 151int perf_event__synthesize_kernel_mmap(perf_event__handler_t process,
149 struct perf_session *session); 152 struct perf_session *session,
150int event__synthesize_kernel_mmap(event__handler_t process, 153 struct machine *machine,
151 struct perf_session *session, 154 const char *symbol_name);
152 struct machine *machine, 155
153 const char *symbol_name); 156int perf_event__synthesize_modules(perf_event__handler_t process,
154 157 struct perf_session *session,
155int event__synthesize_modules(event__handler_t process, 158 struct machine *machine);
156 struct perf_session *session, 159
157 struct machine *machine); 160int perf_event__process_comm(union perf_event *event, struct perf_sample *sample,
158 161 struct perf_session *session);
159int event__process_comm(event_t *self, struct sample_data *sample, 162int perf_event__process_lost(union perf_event *event, struct perf_sample *sample,
160 struct perf_session *session); 163 struct perf_session *session);
161int event__process_lost(event_t *self, struct sample_data *sample, 164int perf_event__process_mmap(union perf_event *event, struct perf_sample *sample,
162 struct perf_session *session); 165 struct perf_session *session);
163int event__process_mmap(event_t *self, struct sample_data *sample, 166int perf_event__process_task(union perf_event *event, struct perf_sample *sample,
164 struct perf_session *session); 167 struct perf_session *session);
165int event__process_task(event_t *self, struct sample_data *sample, 168int perf_event__process(union perf_event *event, struct perf_sample *sample,
166 struct perf_session *session); 169 struct perf_session *session);
167int event__process(event_t *event, struct sample_data *sample,
168 struct perf_session *session);
169 170
170struct addr_location; 171struct addr_location;
171int event__preprocess_sample(const event_t *self, struct perf_session *session, 172int perf_event__preprocess_sample(const union perf_event *self,
172 struct addr_location *al, struct sample_data *data, 173 struct perf_session *session,
173 symbol_filter_t filter); 174 struct addr_location *al,
174int event__parse_sample(const event_t *event, struct perf_session *session, 175 struct perf_sample *sample,
175 struct sample_data *sample); 176 symbol_filter_t filter);
177
178const char *perf_event__name(unsigned int id);
176 179
177const char *event__get_event_name(unsigned int id); 180int perf_event__parse_sample(const union perf_event *event, u64 type,
181 bool sample_id_all, struct perf_sample *sample);
178 182
179#endif /* __PERF_RECORD_H */ 183#endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
new file mode 100644
index 000000000000..d852cefa20de
--- /dev/null
+++ b/tools/perf/util/evlist.c
@@ -0,0 +1,394 @@
1/*
2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
3 *
4 * Parts came from builtin-{top,stat,record}.c, see those files for further
5 * copyright notes.
6 *
7 * Released under the GPL v2. (and only v2, not any later version)
8 */
9#include <poll.h>
10#include "cpumap.h"
11#include "thread_map.h"
12#include "evlist.h"
13#include "evsel.h"
14#include "util.h"
15
16#include <sys/mman.h>
17
18#include <linux/bitops.h>
19#include <linux/hash.h>
20
21#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
22#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
23
24void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
25 struct thread_map *threads)
26{
27 int i;
28
29 for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
30 INIT_HLIST_HEAD(&evlist->heads[i]);
31 INIT_LIST_HEAD(&evlist->entries);
32 perf_evlist__set_maps(evlist, cpus, threads);
33}
34
35struct perf_evlist *perf_evlist__new(struct cpu_map *cpus,
36 struct thread_map *threads)
37{
38 struct perf_evlist *evlist = zalloc(sizeof(*evlist));
39
40 if (evlist != NULL)
41 perf_evlist__init(evlist, cpus, threads);
42
43 return evlist;
44}
45
46static void perf_evlist__purge(struct perf_evlist *evlist)
47{
48 struct perf_evsel *pos, *n;
49
50 list_for_each_entry_safe(pos, n, &evlist->entries, node) {
51 list_del_init(&pos->node);
52 perf_evsel__delete(pos);
53 }
54
55 evlist->nr_entries = 0;
56}
57
58void perf_evlist__exit(struct perf_evlist *evlist)
59{
60 free(evlist->mmap);
61 free(evlist->pollfd);
62 evlist->mmap = NULL;
63 evlist->pollfd = NULL;
64}
65
66void perf_evlist__delete(struct perf_evlist *evlist)
67{
68 perf_evlist__purge(evlist);
69 perf_evlist__exit(evlist);
70 free(evlist);
71}
72
73void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
74{
75 list_add_tail(&entry->node, &evlist->entries);
76 ++evlist->nr_entries;
77}
78
79int perf_evlist__add_default(struct perf_evlist *evlist)
80{
81 struct perf_event_attr attr = {
82 .type = PERF_TYPE_HARDWARE,
83 .config = PERF_COUNT_HW_CPU_CYCLES,
84 };
85 struct perf_evsel *evsel = perf_evsel__new(&attr, 0);
86
87 if (evsel == NULL)
88 return -ENOMEM;
89
90 perf_evlist__add(evlist, evsel);
91 return 0;
92}
93
94int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
95{
96 int nfds = evlist->cpus->nr * evlist->threads->nr * evlist->nr_entries;
97 evlist->pollfd = malloc(sizeof(struct pollfd) * nfds);
98 return evlist->pollfd != NULL ? 0 : -ENOMEM;
99}
100
101void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
102{
103 fcntl(fd, F_SETFL, O_NONBLOCK);
104 evlist->pollfd[evlist->nr_fds].fd = fd;
105 evlist->pollfd[evlist->nr_fds].events = POLLIN;
106 evlist->nr_fds++;
107}
108
109static void perf_evlist__id_hash(struct perf_evlist *evlist,
110 struct perf_evsel *evsel,
111 int cpu, int thread, u64 id)
112{
113 int hash;
114 struct perf_sample_id *sid = SID(evsel, cpu, thread);
115
116 sid->id = id;
117 sid->evsel = evsel;
118 hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
119 hlist_add_head(&sid->node, &evlist->heads[hash]);
120}
121
122void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
123 int cpu, int thread, u64 id)
124{
125 perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
126 evsel->id[evsel->ids++] = id;
127}
128
129static int perf_evlist__id_add_fd(struct perf_evlist *evlist,
130 struct perf_evsel *evsel,
131 int cpu, int thread, int fd)
132{
133 u64 read_data[4] = { 0, };
134 int id_idx = 1; /* The first entry is the counter value */
135
136 if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
137 read(fd, &read_data, sizeof(read_data)) == -1)
138 return -1;
139
140 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
141 ++id_idx;
142 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
143 ++id_idx;
144
145 perf_evlist__id_add(evlist, evsel, cpu, thread, read_data[id_idx]);
146 return 0;
147}
148
149struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
150{
151 struct hlist_head *head;
152 struct hlist_node *pos;
153 struct perf_sample_id *sid;
154 int hash;
155
156 if (evlist->nr_entries == 1)
157 return list_entry(evlist->entries.next, struct perf_evsel, node);
158
159 hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
160 head = &evlist->heads[hash];
161
162 hlist_for_each_entry(sid, pos, head, node)
163 if (sid->id == id)
164 return sid->evsel;
165 return NULL;
166}
167
168union perf_event *perf_evlist__read_on_cpu(struct perf_evlist *evlist, int cpu)
169{
170 /* XXX Move this to perf.c, making it generally available */
171 unsigned int page_size = sysconf(_SC_PAGE_SIZE);
172 struct perf_mmap *md = &evlist->mmap[cpu];
173 unsigned int head = perf_mmap__read_head(md);
174 unsigned int old = md->prev;
175 unsigned char *data = md->base + page_size;
176 union perf_event *event = NULL;
177
178 if (evlist->overwrite) {
179 /*
180 * If we're further behind than half the buffer, there's a chance
181 * the writer will bite our tail and mess up the samples under us.
182 *
183 * If we somehow ended up ahead of the head, we got messed up.
184 *
185 * In either case, truncate and restart at head.
186 */
187 int diff = head - old;
188 if (diff > md->mask / 2 || diff < 0) {
189 fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
190
191 /*
192 * head points to a known good entry, start there.
193 */
194 old = head;
195 }
196 }
197
198 if (old != head) {
199 size_t size;
200
201 event = (union perf_event *)&data[old & md->mask];
202 size = event->header.size;
203
204 /*
205 * Event straddles the mmap boundary -- header should always
206 * be inside due to u64 alignment of output.
207 */
208 if ((old & md->mask) + size != ((old + size) & md->mask)) {
209 unsigned int offset = old;
210 unsigned int len = min(sizeof(*event), size), cpy;
211 void *dst = &evlist->event_copy;
212
213 do {
214 cpy = min(md->mask + 1 - (offset & md->mask), len);
215 memcpy(dst, &data[offset & md->mask], cpy);
216 offset += cpy;
217 dst += cpy;
218 len -= cpy;
219 } while (len);
220
221 event = &evlist->event_copy;
222 }
223
224 old += size;
225 }
226
227 md->prev = old;
228
229 if (!evlist->overwrite)
230 perf_mmap__write_tail(md, old);
231
232 return event;
233}
234
235void perf_evlist__munmap(struct perf_evlist *evlist)
236{
237 int cpu;
238
239 for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
240 if (evlist->mmap[cpu].base != NULL) {
241 munmap(evlist->mmap[cpu].base, evlist->mmap_len);
242 evlist->mmap[cpu].base = NULL;
243 }
244 }
245}
246
247int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
248{
249 evlist->mmap = zalloc(evlist->cpus->nr * sizeof(struct perf_mmap));
250 return evlist->mmap != NULL ? 0 : -ENOMEM;
251}
252
253static int __perf_evlist__mmap(struct perf_evlist *evlist, int cpu, int prot,
254 int mask, int fd)
255{
256 evlist->mmap[cpu].prev = 0;
257 evlist->mmap[cpu].mask = mask;
258 evlist->mmap[cpu].base = mmap(NULL, evlist->mmap_len, prot,
259 MAP_SHARED, fd, 0);
260 if (evlist->mmap[cpu].base == MAP_FAILED)
261 return -1;
262
263 perf_evlist__add_pollfd(evlist, fd);
264 return 0;
265}
266
267/** perf_evlist__mmap - Create per cpu maps to receive events
268 *
269 * @evlist - list of events
270 * @pages - map length in pages
271 * @overwrite - overwrite older events?
272 *
273 * If overwrite is false the user needs to signal event consuption using:
274 *
275 * struct perf_mmap *m = &evlist->mmap[cpu];
276 * unsigned int head = perf_mmap__read_head(m);
277 *
278 * perf_mmap__write_tail(m, head)
279 *
280 * Using perf_evlist__read_on_cpu does this automatically.
281 */
282int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite)
283{
284 unsigned int page_size = sysconf(_SC_PAGE_SIZE);
285 int mask = pages * page_size - 1, cpu;
286 struct perf_evsel *first_evsel, *evsel;
287 const struct cpu_map *cpus = evlist->cpus;
288 const struct thread_map *threads = evlist->threads;
289 int thread, prot = PROT_READ | (overwrite ? 0 : PROT_WRITE);
290
291 if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0)
292 return -ENOMEM;
293
294 if (evlist->pollfd == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
295 return -ENOMEM;
296
297 evlist->overwrite = overwrite;
298 evlist->mmap_len = (pages + 1) * page_size;
299 first_evsel = list_entry(evlist->entries.next, struct perf_evsel, node);
300
301 list_for_each_entry(evsel, &evlist->entries, node) {
302 if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
303 evsel->sample_id == NULL &&
304 perf_evsel__alloc_id(evsel, cpus->nr, threads->nr) < 0)
305 return -ENOMEM;
306
307 for (cpu = 0; cpu < cpus->nr; cpu++) {
308 for (thread = 0; thread < threads->nr; thread++) {
309 int fd = FD(evsel, cpu, thread);
310
311 if (evsel->idx || thread) {
312 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT,
313 FD(first_evsel, cpu, 0)) != 0)
314 goto out_unmap;
315 } else if (__perf_evlist__mmap(evlist, cpu, prot, mask, fd) < 0)
316 goto out_unmap;
317
318 if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
319 perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0)
320 goto out_unmap;
321 }
322 }
323 }
324
325 return 0;
326
327out_unmap:
328 for (cpu = 0; cpu < cpus->nr; cpu++) {
329 if (evlist->mmap[cpu].base != NULL) {
330 munmap(evlist->mmap[cpu].base, evlist->mmap_len);
331 evlist->mmap[cpu].base = NULL;
332 }
333 }
334 return -1;
335}
336
337int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid,
338 pid_t target_tid, const char *cpu_list)
339{
340 evlist->threads = thread_map__new(target_pid, target_tid);
341
342 if (evlist->threads == NULL)
343 return -1;
344
345 if (target_tid != -1)
346 evlist->cpus = cpu_map__dummy_new();
347 else
348 evlist->cpus = cpu_map__new(cpu_list);
349
350 if (evlist->cpus == NULL)
351 goto out_delete_threads;
352
353 return 0;
354
355out_delete_threads:
356 thread_map__delete(evlist->threads);
357 return -1;
358}
359
360void perf_evlist__delete_maps(struct perf_evlist *evlist)
361{
362 cpu_map__delete(evlist->cpus);
363 thread_map__delete(evlist->threads);
364 evlist->cpus = NULL;
365 evlist->threads = NULL;
366}
367
368int perf_evlist__set_filters(struct perf_evlist *evlist)
369{
370 const struct thread_map *threads = evlist->threads;
371 const struct cpu_map *cpus = evlist->cpus;
372 struct perf_evsel *evsel;
373 char *filter;
374 int thread;
375 int cpu;
376 int err;
377 int fd;
378
379 list_for_each_entry(evsel, &evlist->entries, node) {
380 filter = evsel->filter;
381 if (!filter)
382 continue;
383 for (cpu = 0; cpu < cpus->nr; cpu++) {
384 for (thread = 0; thread < threads->nr; thread++) {
385 fd = FD(evsel, cpu, thread);
386 err = ioctl(fd, PERF_EVENT_IOC_SET_FILTER, filter);
387 if (err)
388 return err;
389 }
390 }
391 }
392
393 return 0;
394}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
new file mode 100644
index 000000000000..8b1cb7a4c5f1
--- /dev/null
+++ b/tools/perf/util/evlist.h
@@ -0,0 +1,68 @@
1#ifndef __PERF_EVLIST_H
2#define __PERF_EVLIST_H 1
3
4#include <linux/list.h>
5#include "../perf.h"
6#include "event.h"
7
8struct pollfd;
9struct thread_map;
10struct cpu_map;
11
12#define PERF_EVLIST__HLIST_BITS 8
13#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
14
15struct perf_evlist {
16 struct list_head entries;
17 struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
18 int nr_entries;
19 int nr_fds;
20 int mmap_len;
21 bool overwrite;
22 union perf_event event_copy;
23 struct perf_mmap *mmap;
24 struct pollfd *pollfd;
25 struct thread_map *threads;
26 struct cpu_map *cpus;
27};
28
29struct perf_evsel;
30
31struct perf_evlist *perf_evlist__new(struct cpu_map *cpus,
32 struct thread_map *threads);
33void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
34 struct thread_map *threads);
35void perf_evlist__exit(struct perf_evlist *evlist);
36void perf_evlist__delete(struct perf_evlist *evlist);
37
38void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry);
39int perf_evlist__add_default(struct perf_evlist *evlist);
40
41void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
42 int cpu, int thread, u64 id);
43
44int perf_evlist__alloc_pollfd(struct perf_evlist *evlist);
45void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd);
46
47struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id);
48
49union perf_event *perf_evlist__read_on_cpu(struct perf_evlist *self, int cpu);
50
51int perf_evlist__alloc_mmap(struct perf_evlist *evlist);
52int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite);
53void perf_evlist__munmap(struct perf_evlist *evlist);
54
55static inline void perf_evlist__set_maps(struct perf_evlist *evlist,
56 struct cpu_map *cpus,
57 struct thread_map *threads)
58{
59 evlist->cpus = cpus;
60 evlist->threads = threads;
61}
62
63int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid,
64 pid_t target_tid, const char *cpu_list);
65void perf_evlist__delete_maps(struct perf_evlist *evlist);
66int perf_evlist__set_filters(struct perf_evlist *evlist);
67
68#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index d8575d31ee6c..662596afd7f1 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1,20 +1,34 @@
1/*
2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
3 *
4 * Parts came from builtin-{top,stat,record}.c, see those files for further
5 * copyright notes.
6 *
7 * Released under the GPL v2. (and only v2, not any later version)
8 */
9
1#include "evsel.h" 10#include "evsel.h"
2#include "../perf.h" 11#include "evlist.h"
3#include "util.h" 12#include "util.h"
4#include "cpumap.h" 13#include "cpumap.h"
5#include "thread.h" 14#include "thread_map.h"
6 15
7#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 16#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
8 17
18void perf_evsel__init(struct perf_evsel *evsel,
19 struct perf_event_attr *attr, int idx)
20{
21 evsel->idx = idx;
22 evsel->attr = *attr;
23 INIT_LIST_HEAD(&evsel->node);
24}
25
9struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx) 26struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx)
10{ 27{
11 struct perf_evsel *evsel = zalloc(sizeof(*evsel)); 28 struct perf_evsel *evsel = zalloc(sizeof(*evsel));
12 29
13 if (evsel != NULL) { 30 if (evsel != NULL)
14 evsel->idx = idx; 31 perf_evsel__init(evsel, attr, idx);
15 evsel->attr = *attr;
16 INIT_LIST_HEAD(&evsel->node);
17 }
18 32
19 return evsel; 33 return evsel;
20} 34}
@@ -25,6 +39,22 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
25 return evsel->fd != NULL ? 0 : -ENOMEM; 39 return evsel->fd != NULL ? 0 : -ENOMEM;
26} 40}
27 41
42int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
43{
44 evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
45 if (evsel->sample_id == NULL)
46 return -ENOMEM;
47
48 evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
49 if (evsel->id == NULL) {
50 xyarray__delete(evsel->sample_id);
51 evsel->sample_id = NULL;
52 return -ENOMEM;
53 }
54
55 return 0;
56}
57
28int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus) 58int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
29{ 59{
30 evsel->counts = zalloc((sizeof(*evsel->counts) + 60 evsel->counts = zalloc((sizeof(*evsel->counts) +
@@ -38,6 +68,14 @@ void perf_evsel__free_fd(struct perf_evsel *evsel)
38 evsel->fd = NULL; 68 evsel->fd = NULL;
39} 69}
40 70
71void perf_evsel__free_id(struct perf_evsel *evsel)
72{
73 xyarray__delete(evsel->sample_id);
74 evsel->sample_id = NULL;
75 free(evsel->id);
76 evsel->id = NULL;
77}
78
41void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) 79void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
42{ 80{
43 int cpu, thread; 81 int cpu, thread;
@@ -49,10 +87,19 @@ void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
49 } 87 }
50} 88}
51 89
52void perf_evsel__delete(struct perf_evsel *evsel) 90void perf_evsel__exit(struct perf_evsel *evsel)
53{ 91{
54 assert(list_empty(&evsel->node)); 92 assert(list_empty(&evsel->node));
55 xyarray__delete(evsel->fd); 93 xyarray__delete(evsel->fd);
94 xyarray__delete(evsel->sample_id);
95 free(evsel->id);
96}
97
98void perf_evsel__delete(struct perf_evsel *evsel)
99{
100 perf_evsel__exit(evsel);
101 close_cgroup(evsel->cgrp);
102 free(evsel->name);
56 free(evsel); 103 free(evsel);
57} 104}
58 105
@@ -128,21 +175,51 @@ int __perf_evsel__read(struct perf_evsel *evsel,
128} 175}
129 176
130static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, 177static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
131 struct thread_map *threads) 178 struct thread_map *threads, bool group, bool inherit)
132{ 179{
133 int cpu, thread; 180 int cpu, thread;
181 unsigned long flags = 0;
182 int pid = -1;
134 183
135 if (evsel->fd == NULL && 184 if (evsel->fd == NULL &&
136 perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0) 185 perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
137 return -1; 186 return -1;
138 187
188 if (evsel->cgrp) {
189 flags = PERF_FLAG_PID_CGROUP;
190 pid = evsel->cgrp->fd;
191 }
192
139 for (cpu = 0; cpu < cpus->nr; cpu++) { 193 for (cpu = 0; cpu < cpus->nr; cpu++) {
194 int group_fd = -1;
195 /*
196 * Don't allow mmap() of inherited per-task counters. This
197 * would create a performance issue due to all children writing
198 * to the same buffer.
199 *
200 * FIXME:
201 * Proper fix is not to pass 'inherit' to perf_evsel__open*,
202 * but a 'flags' parameter, with 'group' folded there as well,
203 * then introduce a PERF_O_{MMAP,GROUP,INHERIT} enum, and if
204 * O_MMAP is set, emit a warning if cpu < 0 and O_INHERIT is
205 * set. Lets go for the minimal fix first tho.
206 */
207 evsel->attr.inherit = (cpus->map[cpu] >= 0) && inherit;
208
140 for (thread = 0; thread < threads->nr; thread++) { 209 for (thread = 0; thread < threads->nr; thread++) {
210
211 if (!evsel->cgrp)
212 pid = threads->map[thread];
213
141 FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, 214 FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
142 threads->map[thread], 215 pid,
143 cpus->map[cpu], -1, 0); 216 cpus->map[cpu],
217 group_fd, flags);
144 if (FD(evsel, cpu, thread) < 0) 218 if (FD(evsel, cpu, thread) < 0)
145 goto out_close; 219 goto out_close;
220
221 if (group && group_fd == -1)
222 group_fd = FD(evsel, cpu, thread);
146 } 223 }
147 } 224 }
148 225
@@ -175,10 +252,9 @@ static struct {
175 .threads = { -1, }, 252 .threads = { -1, },
176}; 253};
177 254
178int perf_evsel__open(struct perf_evsel *evsel, 255int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
179 struct cpu_map *cpus, struct thread_map *threads) 256 struct thread_map *threads, bool group, bool inherit)
180{ 257{
181
182 if (cpus == NULL) { 258 if (cpus == NULL) {
183 /* Work around old compiler warnings about strict aliasing */ 259 /* Work around old compiler warnings about strict aliasing */
184 cpus = &empty_cpu_map.map; 260 cpus = &empty_cpu_map.map;
@@ -187,15 +263,135 @@ int perf_evsel__open(struct perf_evsel *evsel,
187 if (threads == NULL) 263 if (threads == NULL)
188 threads = &empty_thread_map.map; 264 threads = &empty_thread_map.map;
189 265
190 return __perf_evsel__open(evsel, cpus, threads); 266 return __perf_evsel__open(evsel, cpus, threads, group, inherit);
191} 267}
192 268
193int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus) 269int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
270 struct cpu_map *cpus, bool group, bool inherit)
194{ 271{
195 return __perf_evsel__open(evsel, cpus, &empty_thread_map.map); 272 return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group, inherit);
273}
274
275int perf_evsel__open_per_thread(struct perf_evsel *evsel,
276 struct thread_map *threads, bool group, bool inherit)
277{
278 return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group, inherit);
279}
280
281static int perf_event__parse_id_sample(const union perf_event *event, u64 type,
282 struct perf_sample *sample)
283{
284 const u64 *array = event->sample.array;
285
286 array += ((event->header.size -
287 sizeof(event->header)) / sizeof(u64)) - 1;
288
289 if (type & PERF_SAMPLE_CPU) {
290 u32 *p = (u32 *)array;
291 sample->cpu = *p;
292 array--;
293 }
294
295 if (type & PERF_SAMPLE_STREAM_ID) {
296 sample->stream_id = *array;
297 array--;
298 }
299
300 if (type & PERF_SAMPLE_ID) {
301 sample->id = *array;
302 array--;
303 }
304
305 if (type & PERF_SAMPLE_TIME) {
306 sample->time = *array;
307 array--;
308 }
309
310 if (type & PERF_SAMPLE_TID) {
311 u32 *p = (u32 *)array;
312 sample->pid = p[0];
313 sample->tid = p[1];
314 }
315
316 return 0;
196} 317}
197 318
198int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads) 319int perf_event__parse_sample(const union perf_event *event, u64 type,
320 bool sample_id_all, struct perf_sample *data)
199{ 321{
200 return __perf_evsel__open(evsel, &empty_cpu_map.map, threads); 322 const u64 *array;
323
324 data->cpu = data->pid = data->tid = -1;
325 data->stream_id = data->id = data->time = -1ULL;
326
327 if (event->header.type != PERF_RECORD_SAMPLE) {
328 if (!sample_id_all)
329 return 0;
330 return perf_event__parse_id_sample(event, type, data);
331 }
332
333 array = event->sample.array;
334
335 if (type & PERF_SAMPLE_IP) {
336 data->ip = event->ip.ip;
337 array++;
338 }
339
340 if (type & PERF_SAMPLE_TID) {
341 u32 *p = (u32 *)array;
342 data->pid = p[0];
343 data->tid = p[1];
344 array++;
345 }
346
347 if (type & PERF_SAMPLE_TIME) {
348 data->time = *array;
349 array++;
350 }
351
352 if (type & PERF_SAMPLE_ADDR) {
353 data->addr = *array;
354 array++;
355 }
356
357 data->id = -1ULL;
358 if (type & PERF_SAMPLE_ID) {
359 data->id = *array;
360 array++;
361 }
362
363 if (type & PERF_SAMPLE_STREAM_ID) {
364 data->stream_id = *array;
365 array++;
366 }
367
368 if (type & PERF_SAMPLE_CPU) {
369 u32 *p = (u32 *)array;
370 data->cpu = *p;
371 array++;
372 }
373
374 if (type & PERF_SAMPLE_PERIOD) {
375 data->period = *array;
376 array++;
377 }
378
379 if (type & PERF_SAMPLE_READ) {
380 fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n");
381 return -1;
382 }
383
384 if (type & PERF_SAMPLE_CALLCHAIN) {
385 data->callchain = (struct ip_callchain *)array;
386 array += 1 + data->callchain->nr;
387 }
388
389 if (type & PERF_SAMPLE_RAW) {
390 u32 *p = (u32 *)array;
391 data->raw_size = *p;
392 p++;
393 data->raw_data = p;
394 }
395
396 return 0;
201} 397}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index b2d755fe88a5..6710ab538342 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -6,6 +6,8 @@
6#include "../../../include/linux/perf_event.h" 6#include "../../../include/linux/perf_event.h"
7#include "types.h" 7#include "types.h"
8#include "xyarray.h" 8#include "xyarray.h"
9#include "cgroup.h"
10#include "hist.h"
9 11
10struct perf_counts_values { 12struct perf_counts_values {
11 union { 13 union {
@@ -24,31 +26,66 @@ struct perf_counts {
24 struct perf_counts_values cpu[]; 26 struct perf_counts_values cpu[];
25}; 27};
26 28
29struct perf_evsel;
30
31/*
32 * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are
33 * more than one entry in the evlist.
34 */
35struct perf_sample_id {
36 struct hlist_node node;
37 u64 id;
38 struct perf_evsel *evsel;
39};
40
41/** struct perf_evsel - event selector
42 *
43 * @name - Can be set to retain the original event name passed by the user,
44 * so that when showing results in tools such as 'perf stat', we
45 * show the name used, not some alias.
46 */
27struct perf_evsel { 47struct perf_evsel {
28 struct list_head node; 48 struct list_head node;
29 struct perf_event_attr attr; 49 struct perf_event_attr attr;
30 char *filter; 50 char *filter;
31 struct xyarray *fd; 51 struct xyarray *fd;
52 struct xyarray *sample_id;
53 u64 *id;
32 struct perf_counts *counts; 54 struct perf_counts *counts;
33 int idx; 55 int idx;
34 void *priv; 56 int ids;
57 struct hists hists;
58 char *name;
59 union {
60 void *priv;
61 off_t id_offset;
62 };
63 struct cgroup_sel *cgrp;
35}; 64};
36 65
37struct cpu_map; 66struct cpu_map;
38struct thread_map; 67struct thread_map;
68struct perf_evlist;
39 69
40struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx); 70struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx);
71void perf_evsel__init(struct perf_evsel *evsel,
72 struct perf_event_attr *attr, int idx);
73void perf_evsel__exit(struct perf_evsel *evsel);
41void perf_evsel__delete(struct perf_evsel *evsel); 74void perf_evsel__delete(struct perf_evsel *evsel);
42 75
43int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); 76int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
77int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
44int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus); 78int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
45void perf_evsel__free_fd(struct perf_evsel *evsel); 79void perf_evsel__free_fd(struct perf_evsel *evsel);
80void perf_evsel__free_id(struct perf_evsel *evsel);
46void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads); 81void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
47 82
48int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus); 83int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
49int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads); 84 struct cpu_map *cpus, bool group, bool inherit);
50int perf_evsel__open(struct perf_evsel *evsel, 85int perf_evsel__open_per_thread(struct perf_evsel *evsel,
51 struct cpu_map *cpus, struct thread_map *threads); 86 struct thread_map *threads, bool group, bool inherit);
87int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
88 struct thread_map *threads, bool group, bool inherit);
52 89
53#define perf_evsel__match(evsel, t, c) \ 90#define perf_evsel__match(evsel, t, c) \
54 (evsel->attr.type == PERF_TYPE_##t && \ 91 (evsel->attr.type == PERF_TYPE_##t && \
diff --git a/tools/perf/util/exec_cmd.c b/tools/perf/util/exec_cmd.c
index 67eeff571568..7adf4ad15d8f 100644
--- a/tools/perf/util/exec_cmd.c
+++ b/tools/perf/util/exec_cmd.c
@@ -11,31 +11,12 @@ static const char *argv0_path;
11 11
12const char *system_path(const char *path) 12const char *system_path(const char *path)
13{ 13{
14#ifdef RUNTIME_PREFIX
15 static const char *prefix;
16#else
17 static const char *prefix = PREFIX; 14 static const char *prefix = PREFIX;
18#endif
19 struct strbuf d = STRBUF_INIT; 15 struct strbuf d = STRBUF_INIT;
20 16
21 if (is_absolute_path(path)) 17 if (is_absolute_path(path))
22 return path; 18 return path;
23 19
24#ifdef RUNTIME_PREFIX
25 assert(argv0_path);
26 assert(is_absolute_path(argv0_path));
27
28 if (!prefix &&
29 !(prefix = strip_path_suffix(argv0_path, PERF_EXEC_PATH)) &&
30 !(prefix = strip_path_suffix(argv0_path, BINDIR)) &&
31 !(prefix = strip_path_suffix(argv0_path, "perf"))) {
32 prefix = PREFIX;
33 fprintf(stderr, "RUNTIME_PREFIX requested, "
34 "but prefix computation failed. "
35 "Using static fallback '%s'.\n", prefix);
36 }
37#endif
38
39 strbuf_addf(&d, "%s/%s", prefix, path); 20 strbuf_addf(&d, "%s/%s", prefix, path);
40 path = strbuf_detach(&d, NULL); 21 path = strbuf_detach(&d, NULL);
41 return path; 22 return path;
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index f6a929e74981..e5230c0ef95b 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -8,6 +8,8 @@
8#include <linux/list.h> 8#include <linux/list.h>
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10 10
11#include "evlist.h"
12#include "evsel.h"
11#include "util.h" 13#include "util.h"
12#include "header.h" 14#include "header.h"
13#include "../perf.h" 15#include "../perf.h"
@@ -18,89 +20,6 @@
18 20
19static bool no_buildid_cache = false; 21static bool no_buildid_cache = false;
20 22
21/*
22 * Create new perf.data header attribute:
23 */
24struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr)
25{
26 struct perf_header_attr *self = malloc(sizeof(*self));
27
28 if (self != NULL) {
29 self->attr = *attr;
30 self->ids = 0;
31 self->size = 1;
32 self->id = malloc(sizeof(u64));
33 if (self->id == NULL) {
34 free(self);
35 self = NULL;
36 }
37 }
38
39 return self;
40}
41
42void perf_header_attr__delete(struct perf_header_attr *self)
43{
44 free(self->id);
45 free(self);
46}
47
48int perf_header_attr__add_id(struct perf_header_attr *self, u64 id)
49{
50 int pos = self->ids;
51
52 self->ids++;
53 if (self->ids > self->size) {
54 int nsize = self->size * 2;
55 u64 *nid = realloc(self->id, nsize * sizeof(u64));
56
57 if (nid == NULL)
58 return -1;
59
60 self->size = nsize;
61 self->id = nid;
62 }
63 self->id[pos] = id;
64 return 0;
65}
66
67int perf_header__init(struct perf_header *self)
68{
69 self->size = 1;
70 self->attr = malloc(sizeof(void *));
71 return self->attr == NULL ? -ENOMEM : 0;
72}
73
74void perf_header__exit(struct perf_header *self)
75{
76 int i;
77 for (i = 0; i < self->attrs; ++i)
78 perf_header_attr__delete(self->attr[i]);
79 free(self->attr);
80}
81
82int perf_header__add_attr(struct perf_header *self,
83 struct perf_header_attr *attr)
84{
85 if (self->frozen)
86 return -1;
87
88 if (self->attrs == self->size) {
89 int nsize = self->size * 2;
90 struct perf_header_attr **nattr;
91
92 nattr = realloc(self->attr, nsize * sizeof(void *));
93 if (nattr == NULL)
94 return -1;
95
96 self->size = nsize;
97 self->attr = nattr;
98 }
99
100 self->attr[self->attrs++] = attr;
101 return 0;
102}
103
104static int event_count; 23static int event_count;
105static struct perf_trace_event_type *events; 24static struct perf_trace_event_type *events;
106 25
@@ -147,19 +66,19 @@ struct perf_file_attr {
147 struct perf_file_section ids; 66 struct perf_file_section ids;
148}; 67};
149 68
150void perf_header__set_feat(struct perf_header *self, int feat) 69void perf_header__set_feat(struct perf_header *header, int feat)
151{ 70{
152 set_bit(feat, self->adds_features); 71 set_bit(feat, header->adds_features);
153} 72}
154 73
155void perf_header__clear_feat(struct perf_header *self, int feat) 74void perf_header__clear_feat(struct perf_header *header, int feat)
156{ 75{
157 clear_bit(feat, self->adds_features); 76 clear_bit(feat, header->adds_features);
158} 77}
159 78
160bool perf_header__has_feat(const struct perf_header *self, int feat) 79bool perf_header__has_feat(const struct perf_header *header, int feat)
161{ 80{
162 return test_bit(feat, self->adds_features); 81 return test_bit(feat, header->adds_features);
163} 82}
164 83
165static int do_write(int fd, const void *buf, size_t size) 84static int do_write(int fd, const void *buf, size_t size)
@@ -228,22 +147,22 @@ static int __dsos__write_buildid_table(struct list_head *head, pid_t pid,
228 return 0; 147 return 0;
229} 148}
230 149
231static int machine__write_buildid_table(struct machine *self, int fd) 150static int machine__write_buildid_table(struct machine *machine, int fd)
232{ 151{
233 int err; 152 int err;
234 u16 kmisc = PERF_RECORD_MISC_KERNEL, 153 u16 kmisc = PERF_RECORD_MISC_KERNEL,
235 umisc = PERF_RECORD_MISC_USER; 154 umisc = PERF_RECORD_MISC_USER;
236 155
237 if (!machine__is_host(self)) { 156 if (!machine__is_host(machine)) {
238 kmisc = PERF_RECORD_MISC_GUEST_KERNEL; 157 kmisc = PERF_RECORD_MISC_GUEST_KERNEL;
239 umisc = PERF_RECORD_MISC_GUEST_USER; 158 umisc = PERF_RECORD_MISC_GUEST_USER;
240 } 159 }
241 160
242 err = __dsos__write_buildid_table(&self->kernel_dsos, self->pid, 161 err = __dsos__write_buildid_table(&machine->kernel_dsos, machine->pid,
243 kmisc, fd); 162 kmisc, fd);
244 if (err == 0) 163 if (err == 0)
245 err = __dsos__write_buildid_table(&self->user_dsos, 164 err = __dsos__write_buildid_table(&machine->user_dsos,
246 self->pid, umisc, fd); 165 machine->pid, umisc, fd);
247 return err; 166 return err;
248} 167}
249 168
@@ -270,11 +189,15 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
270 const char *name, bool is_kallsyms) 189 const char *name, bool is_kallsyms)
271{ 190{
272 const size_t size = PATH_MAX; 191 const size_t size = PATH_MAX;
273 char *realname = realpath(name, NULL), 192 char *realname, *filename = malloc(size),
274 *filename = malloc(size),
275 *linkname = malloc(size), *targetname; 193 *linkname = malloc(size), *targetname;
276 int len, err = -1; 194 int len, err = -1;
277 195
196 if (is_kallsyms)
197 realname = (char *)name;
198 else
199 realname = realpath(name, NULL);
200
278 if (realname == NULL || filename == NULL || linkname == NULL) 201 if (realname == NULL || filename == NULL || linkname == NULL)
279 goto out_free; 202 goto out_free;
280 203
@@ -306,7 +229,8 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
306 if (symlink(targetname, linkname) == 0) 229 if (symlink(targetname, linkname) == 0)
307 err = 0; 230 err = 0;
308out_free: 231out_free:
309 free(realname); 232 if (!is_kallsyms)
233 free(realname);
310 free(filename); 234 free(filename);
311 free(linkname); 235 free(linkname);
312 return err; 236 return err;
@@ -361,12 +285,12 @@ out_free:
361 return err; 285 return err;
362} 286}
363 287
364static int dso__cache_build_id(struct dso *self, const char *debugdir) 288static int dso__cache_build_id(struct dso *dso, const char *debugdir)
365{ 289{
366 bool is_kallsyms = self->kernel && self->long_name[0] != '/'; 290 bool is_kallsyms = dso->kernel && dso->long_name[0] != '/';
367 291
368 return build_id_cache__add_b(self->build_id, sizeof(self->build_id), 292 return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id),
369 self->long_name, debugdir, is_kallsyms); 293 dso->long_name, debugdir, is_kallsyms);
370} 294}
371 295
372static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir) 296static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir)
@@ -381,14 +305,14 @@ static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir)
381 return err; 305 return err;
382} 306}
383 307
384static int machine__cache_build_ids(struct machine *self, const char *debugdir) 308static int machine__cache_build_ids(struct machine *machine, const char *debugdir)
385{ 309{
386 int ret = __dsos__cache_build_ids(&self->kernel_dsos, debugdir); 310 int ret = __dsos__cache_build_ids(&machine->kernel_dsos, debugdir);
387 ret |= __dsos__cache_build_ids(&self->user_dsos, debugdir); 311 ret |= __dsos__cache_build_ids(&machine->user_dsos, debugdir);
388 return ret; 312 return ret;
389} 313}
390 314
391static int perf_session__cache_build_ids(struct perf_session *self) 315static int perf_session__cache_build_ids(struct perf_session *session)
392{ 316{
393 struct rb_node *nd; 317 struct rb_node *nd;
394 int ret; 318 int ret;
@@ -399,28 +323,28 @@ static int perf_session__cache_build_ids(struct perf_session *self)
399 if (mkdir(debugdir, 0755) != 0 && errno != EEXIST) 323 if (mkdir(debugdir, 0755) != 0 && errno != EEXIST)
400 return -1; 324 return -1;
401 325
402 ret = machine__cache_build_ids(&self->host_machine, debugdir); 326 ret = machine__cache_build_ids(&session->host_machine, debugdir);
403 327
404 for (nd = rb_first(&self->machines); nd; nd = rb_next(nd)) { 328 for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) {
405 struct machine *pos = rb_entry(nd, struct machine, rb_node); 329 struct machine *pos = rb_entry(nd, struct machine, rb_node);
406 ret |= machine__cache_build_ids(pos, debugdir); 330 ret |= machine__cache_build_ids(pos, debugdir);
407 } 331 }
408 return ret ? -1 : 0; 332 return ret ? -1 : 0;
409} 333}
410 334
411static bool machine__read_build_ids(struct machine *self, bool with_hits) 335static bool machine__read_build_ids(struct machine *machine, bool with_hits)
412{ 336{
413 bool ret = __dsos__read_build_ids(&self->kernel_dsos, with_hits); 337 bool ret = __dsos__read_build_ids(&machine->kernel_dsos, with_hits);
414 ret |= __dsos__read_build_ids(&self->user_dsos, with_hits); 338 ret |= __dsos__read_build_ids(&machine->user_dsos, with_hits);
415 return ret; 339 return ret;
416} 340}
417 341
418static bool perf_session__read_build_ids(struct perf_session *self, bool with_hits) 342static bool perf_session__read_build_ids(struct perf_session *session, bool with_hits)
419{ 343{
420 struct rb_node *nd; 344 struct rb_node *nd;
421 bool ret = machine__read_build_ids(&self->host_machine, with_hits); 345 bool ret = machine__read_build_ids(&session->host_machine, with_hits);
422 346
423 for (nd = rb_first(&self->machines); nd; nd = rb_next(nd)) { 347 for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) {
424 struct machine *pos = rb_entry(nd, struct machine, rb_node); 348 struct machine *pos = rb_entry(nd, struct machine, rb_node);
425 ret |= machine__read_build_ids(pos, with_hits); 349 ret |= machine__read_build_ids(pos, with_hits);
426 } 350 }
@@ -428,7 +352,8 @@ static bool perf_session__read_build_ids(struct perf_session *self, bool with_hi
428 return ret; 352 return ret;
429} 353}
430 354
431static int perf_header__adds_write(struct perf_header *self, int fd) 355static int perf_header__adds_write(struct perf_header *header,
356 struct perf_evlist *evlist, int fd)
432{ 357{
433 int nr_sections; 358 int nr_sections;
434 struct perf_session *session; 359 struct perf_session *session;
@@ -437,13 +362,13 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
437 u64 sec_start; 362 u64 sec_start;
438 int idx = 0, err; 363 int idx = 0, err;
439 364
440 session = container_of(self, struct perf_session, header); 365 session = container_of(header, struct perf_session, header);
441 366
442 if (perf_header__has_feat(self, HEADER_BUILD_ID && 367 if (perf_header__has_feat(header, HEADER_BUILD_ID &&
443 !perf_session__read_build_ids(session, true))) 368 !perf_session__read_build_ids(session, true)))
444 perf_header__clear_feat(self, HEADER_BUILD_ID); 369 perf_header__clear_feat(header, HEADER_BUILD_ID);
445 370
446 nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); 371 nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS);
447 if (!nr_sections) 372 if (!nr_sections)
448 return 0; 373 return 0;
449 374
@@ -453,28 +378,28 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
453 378
454 sec_size = sizeof(*feat_sec) * nr_sections; 379 sec_size = sizeof(*feat_sec) * nr_sections;
455 380
456 sec_start = self->data_offset + self->data_size; 381 sec_start = header->data_offset + header->data_size;
457 lseek(fd, sec_start + sec_size, SEEK_SET); 382 lseek(fd, sec_start + sec_size, SEEK_SET);
458 383
459 if (perf_header__has_feat(self, HEADER_TRACE_INFO)) { 384 if (perf_header__has_feat(header, HEADER_TRACE_INFO)) {
460 struct perf_file_section *trace_sec; 385 struct perf_file_section *trace_sec;
461 386
462 trace_sec = &feat_sec[idx++]; 387 trace_sec = &feat_sec[idx++];
463 388
464 /* Write trace info */ 389 /* Write trace info */
465 trace_sec->offset = lseek(fd, 0, SEEK_CUR); 390 trace_sec->offset = lseek(fd, 0, SEEK_CUR);
466 read_tracing_data(fd, &evsel_list); 391 read_tracing_data(fd, &evlist->entries);
467 trace_sec->size = lseek(fd, 0, SEEK_CUR) - trace_sec->offset; 392 trace_sec->size = lseek(fd, 0, SEEK_CUR) - trace_sec->offset;
468 } 393 }
469 394
470 if (perf_header__has_feat(self, HEADER_BUILD_ID)) { 395 if (perf_header__has_feat(header, HEADER_BUILD_ID)) {
471 struct perf_file_section *buildid_sec; 396 struct perf_file_section *buildid_sec;
472 397
473 buildid_sec = &feat_sec[idx++]; 398 buildid_sec = &feat_sec[idx++];
474 399
475 /* Write build-ids */ 400 /* Write build-ids */
476 buildid_sec->offset = lseek(fd, 0, SEEK_CUR); 401 buildid_sec->offset = lseek(fd, 0, SEEK_CUR);
477 err = dsos__write_buildid_table(self, fd); 402 err = dsos__write_buildid_table(header, fd);
478 if (err < 0) { 403 if (err < 0) {
479 pr_debug("failed to write buildid table\n"); 404 pr_debug("failed to write buildid table\n");
480 goto out_free; 405 goto out_free;
@@ -513,32 +438,41 @@ int perf_header__write_pipe(int fd)
513 return 0; 438 return 0;
514} 439}
515 440
516int perf_header__write(struct perf_header *self, int fd, bool at_exit) 441int perf_session__write_header(struct perf_session *session,
442 struct perf_evlist *evlist,
443 int fd, bool at_exit)
517{ 444{
518 struct perf_file_header f_header; 445 struct perf_file_header f_header;
519 struct perf_file_attr f_attr; 446 struct perf_file_attr f_attr;
520 struct perf_header_attr *attr; 447 struct perf_header *header = &session->header;
521 int i, err; 448 struct perf_evsel *attr, *pair = NULL;
449 int err;
522 450
523 lseek(fd, sizeof(f_header), SEEK_SET); 451 lseek(fd, sizeof(f_header), SEEK_SET);
524 452
525 for (i = 0; i < self->attrs; i++) { 453 if (session->evlist != evlist)
526 attr = self->attr[i]; 454 pair = list_entry(session->evlist->entries.next, struct perf_evsel, node);
527 455
456 list_for_each_entry(attr, &evlist->entries, node) {
528 attr->id_offset = lseek(fd, 0, SEEK_CUR); 457 attr->id_offset = lseek(fd, 0, SEEK_CUR);
529 err = do_write(fd, attr->id, attr->ids * sizeof(u64)); 458 err = do_write(fd, attr->id, attr->ids * sizeof(u64));
530 if (err < 0) { 459 if (err < 0) {
460out_err_write:
531 pr_debug("failed to write perf header\n"); 461 pr_debug("failed to write perf header\n");
532 return err; 462 return err;
533 } 463 }
464 if (session->evlist != evlist) {
465 err = do_write(fd, pair->id, pair->ids * sizeof(u64));
466 if (err < 0)
467 goto out_err_write;
468 attr->ids += pair->ids;
469 pair = list_entry(pair->node.next, struct perf_evsel, node);
470 }
534 } 471 }
535 472
473 header->attr_offset = lseek(fd, 0, SEEK_CUR);
536 474
537 self->attr_offset = lseek(fd, 0, SEEK_CUR); 475 list_for_each_entry(attr, &evlist->entries, node) {
538
539 for (i = 0; i < self->attrs; i++) {
540 attr = self->attr[i];
541
542 f_attr = (struct perf_file_attr){ 476 f_attr = (struct perf_file_attr){
543 .attr = attr->attr, 477 .attr = attr->attr,
544 .ids = { 478 .ids = {
@@ -553,20 +487,20 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit)
553 } 487 }
554 } 488 }
555 489
556 self->event_offset = lseek(fd, 0, SEEK_CUR); 490 header->event_offset = lseek(fd, 0, SEEK_CUR);
557 self->event_size = event_count * sizeof(struct perf_trace_event_type); 491 header->event_size = event_count * sizeof(struct perf_trace_event_type);
558 if (events) { 492 if (events) {
559 err = do_write(fd, events, self->event_size); 493 err = do_write(fd, events, header->event_size);
560 if (err < 0) { 494 if (err < 0) {
561 pr_debug("failed to write perf header events\n"); 495 pr_debug("failed to write perf header events\n");
562 return err; 496 return err;
563 } 497 }
564 } 498 }
565 499
566 self->data_offset = lseek(fd, 0, SEEK_CUR); 500 header->data_offset = lseek(fd, 0, SEEK_CUR);
567 501
568 if (at_exit) { 502 if (at_exit) {
569 err = perf_header__adds_write(self, fd); 503 err = perf_header__adds_write(header, evlist, fd);
570 if (err < 0) 504 if (err < 0)
571 return err; 505 return err;
572 } 506 }
@@ -576,20 +510,20 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit)
576 .size = sizeof(f_header), 510 .size = sizeof(f_header),
577 .attr_size = sizeof(f_attr), 511 .attr_size = sizeof(f_attr),
578 .attrs = { 512 .attrs = {
579 .offset = self->attr_offset, 513 .offset = header->attr_offset,
580 .size = self->attrs * sizeof(f_attr), 514 .size = evlist->nr_entries * sizeof(f_attr),
581 }, 515 },
582 .data = { 516 .data = {
583 .offset = self->data_offset, 517 .offset = header->data_offset,
584 .size = self->data_size, 518 .size = header->data_size,
585 }, 519 },
586 .event_types = { 520 .event_types = {
587 .offset = self->event_offset, 521 .offset = header->event_offset,
588 .size = self->event_size, 522 .size = header->event_size,
589 }, 523 },
590 }; 524 };
591 525
592 memcpy(&f_header.adds_features, &self->adds_features, sizeof(self->adds_features)); 526 memcpy(&f_header.adds_features, &header->adds_features, sizeof(header->adds_features));
593 527
594 lseek(fd, 0, SEEK_SET); 528 lseek(fd, 0, SEEK_SET);
595 err = do_write(fd, &f_header, sizeof(f_header)); 529 err = do_write(fd, &f_header, sizeof(f_header));
@@ -597,26 +531,26 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit)
597 pr_debug("failed to write perf header\n"); 531 pr_debug("failed to write perf header\n");
598 return err; 532 return err;
599 } 533 }
600 lseek(fd, self->data_offset + self->data_size, SEEK_SET); 534 lseek(fd, header->data_offset + header->data_size, SEEK_SET);
601 535
602 self->frozen = 1; 536 header->frozen = 1;
603 return 0; 537 return 0;
604} 538}
605 539
606static int perf_header__getbuffer64(struct perf_header *self, 540static int perf_header__getbuffer64(struct perf_header *header,
607 int fd, void *buf, size_t size) 541 int fd, void *buf, size_t size)
608{ 542{
609 if (readn(fd, buf, size) <= 0) 543 if (readn(fd, buf, size) <= 0)
610 return -1; 544 return -1;
611 545
612 if (self->needs_swap) 546 if (header->needs_swap)
613 mem_bswap_64(buf, size); 547 mem_bswap_64(buf, size);
614 548
615 return 0; 549 return 0;
616} 550}
617 551
618int perf_header__process_sections(struct perf_header *self, int fd, 552int perf_header__process_sections(struct perf_header *header, int fd,
619 int (*process)(struct perf_file_section *self, 553 int (*process)(struct perf_file_section *section,
620 struct perf_header *ph, 554 struct perf_header *ph,
621 int feat, int fd)) 555 int feat, int fd))
622{ 556{
@@ -626,7 +560,7 @@ int perf_header__process_sections(struct perf_header *self, int fd,
626 int idx = 0; 560 int idx = 0;
627 int err = -1, feat = 1; 561 int err = -1, feat = 1;
628 562
629 nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); 563 nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS);
630 if (!nr_sections) 564 if (!nr_sections)
631 return 0; 565 return 0;
632 566
@@ -636,17 +570,17 @@ int perf_header__process_sections(struct perf_header *self, int fd,
636 570
637 sec_size = sizeof(*feat_sec) * nr_sections; 571 sec_size = sizeof(*feat_sec) * nr_sections;
638 572
639 lseek(fd, self->data_offset + self->data_size, SEEK_SET); 573 lseek(fd, header->data_offset + header->data_size, SEEK_SET);
640 574
641 if (perf_header__getbuffer64(self, fd, feat_sec, sec_size)) 575 if (perf_header__getbuffer64(header, fd, feat_sec, sec_size))
642 goto out_free; 576 goto out_free;
643 577
644 err = 0; 578 err = 0;
645 while (idx < nr_sections && feat < HEADER_LAST_FEATURE) { 579 while (idx < nr_sections && feat < HEADER_LAST_FEATURE) {
646 if (perf_header__has_feat(self, feat)) { 580 if (perf_header__has_feat(header, feat)) {
647 struct perf_file_section *sec = &feat_sec[idx++]; 581 struct perf_file_section *sec = &feat_sec[idx++];
648 582
649 err = process(sec, self, feat, fd); 583 err = process(sec, header, feat, fd);
650 if (err < 0) 584 if (err < 0)
651 break; 585 break;
652 } 586 }
@@ -657,35 +591,35 @@ out_free:
657 return err; 591 return err;
658} 592}
659 593
660int perf_file_header__read(struct perf_file_header *self, 594int perf_file_header__read(struct perf_file_header *header,
661 struct perf_header *ph, int fd) 595 struct perf_header *ph, int fd)
662{ 596{
663 lseek(fd, 0, SEEK_SET); 597 lseek(fd, 0, SEEK_SET);
664 598
665 if (readn(fd, self, sizeof(*self)) <= 0 || 599 if (readn(fd, header, sizeof(*header)) <= 0 ||
666 memcmp(&self->magic, __perf_magic, sizeof(self->magic))) 600 memcmp(&header->magic, __perf_magic, sizeof(header->magic)))
667 return -1; 601 return -1;
668 602
669 if (self->attr_size != sizeof(struct perf_file_attr)) { 603 if (header->attr_size != sizeof(struct perf_file_attr)) {
670 u64 attr_size = bswap_64(self->attr_size); 604 u64 attr_size = bswap_64(header->attr_size);
671 605
672 if (attr_size != sizeof(struct perf_file_attr)) 606 if (attr_size != sizeof(struct perf_file_attr))
673 return -1; 607 return -1;
674 608
675 mem_bswap_64(self, offsetof(struct perf_file_header, 609 mem_bswap_64(header, offsetof(struct perf_file_header,
676 adds_features)); 610 adds_features));
677 ph->needs_swap = true; 611 ph->needs_swap = true;
678 } 612 }
679 613
680 if (self->size != sizeof(*self)) { 614 if (header->size != sizeof(*header)) {
681 /* Support the previous format */ 615 /* Support the previous format */
682 if (self->size == offsetof(typeof(*self), adds_features)) 616 if (header->size == offsetof(typeof(*header), adds_features))
683 bitmap_zero(self->adds_features, HEADER_FEAT_BITS); 617 bitmap_zero(header->adds_features, HEADER_FEAT_BITS);
684 else 618 else
685 return -1; 619 return -1;
686 } 620 }
687 621
688 memcpy(&ph->adds_features, &self->adds_features, 622 memcpy(&ph->adds_features, &header->adds_features,
689 sizeof(ph->adds_features)); 623 sizeof(ph->adds_features));
690 /* 624 /*
691 * FIXME: hack that assumes that if we need swap the perf.data file 625 * FIXME: hack that assumes that if we need swap the perf.data file
@@ -699,10 +633,10 @@ int perf_file_header__read(struct perf_file_header *self,
699 perf_header__set_feat(ph, HEADER_BUILD_ID); 633 perf_header__set_feat(ph, HEADER_BUILD_ID);
700 } 634 }
701 635
702 ph->event_offset = self->event_types.offset; 636 ph->event_offset = header->event_types.offset;
703 ph->event_size = self->event_types.size; 637 ph->event_size = header->event_types.size;
704 ph->data_offset = self->data.offset; 638 ph->data_offset = header->data.offset;
705 ph->data_size = self->data.size; 639 ph->data_size = header->data.size;
706 return 0; 640 return 0;
707} 641}
708 642
@@ -761,11 +695,10 @@ out:
761 return err; 695 return err;
762} 696}
763 697
764static int perf_header__read_build_ids(struct perf_header *self, 698static int perf_header__read_build_ids(struct perf_header *header,
765 int input, u64 offset, u64 size) 699 int input, u64 offset, u64 size)
766{ 700{
767 struct perf_session *session = container_of(self, 701 struct perf_session *session = container_of(header, struct perf_session, header);
768 struct perf_session, header);
769 struct build_id_event bev; 702 struct build_id_event bev;
770 char filename[PATH_MAX]; 703 char filename[PATH_MAX];
771 u64 limit = offset + size; 704 u64 limit = offset + size;
@@ -777,7 +710,7 @@ static int perf_header__read_build_ids(struct perf_header *self,
777 if (read(input, &bev, sizeof(bev)) != sizeof(bev)) 710 if (read(input, &bev, sizeof(bev)) != sizeof(bev))
778 goto out; 711 goto out;
779 712
780 if (self->needs_swap) 713 if (header->needs_swap)
781 perf_event_header__bswap(&bev.header); 714 perf_event_header__bswap(&bev.header);
782 715
783 len = bev.header.size - sizeof(bev); 716 len = bev.header.size - sizeof(bev);
@@ -793,13 +726,13 @@ out:
793 return err; 726 return err;
794} 727}
795 728
796static int perf_file_section__process(struct perf_file_section *self, 729static int perf_file_section__process(struct perf_file_section *section,
797 struct perf_header *ph, 730 struct perf_header *ph,
798 int feat, int fd) 731 int feat, int fd)
799{ 732{
800 if (lseek(fd, self->offset, SEEK_SET) == (off_t)-1) { 733 if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) {
801 pr_debug("Failed to lseek to %" PRIu64 " offset for feature " 734 pr_debug("Failed to lseek to %" PRIu64 " offset for feature "
802 "%d, continuing...\n", self->offset, feat); 735 "%d, continuing...\n", section->offset, feat);
803 return 0; 736 return 0;
804 } 737 }
805 738
@@ -809,7 +742,7 @@ static int perf_file_section__process(struct perf_file_section *self,
809 break; 742 break;
810 743
811 case HEADER_BUILD_ID: 744 case HEADER_BUILD_ID:
812 if (perf_header__read_build_ids(ph, fd, self->offset, self->size)) 745 if (perf_header__read_build_ids(ph, fd, section->offset, section->size))
813 pr_debug("Failed to read buildids, continuing...\n"); 746 pr_debug("Failed to read buildids, continuing...\n");
814 break; 747 break;
815 default: 748 default:
@@ -819,21 +752,21 @@ static int perf_file_section__process(struct perf_file_section *self,
819 return 0; 752 return 0;
820} 753}
821 754
822static int perf_file_header__read_pipe(struct perf_pipe_file_header *self, 755static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
823 struct perf_header *ph, int fd, 756 struct perf_header *ph, int fd,
824 bool repipe) 757 bool repipe)
825{ 758{
826 if (readn(fd, self, sizeof(*self)) <= 0 || 759 if (readn(fd, header, sizeof(*header)) <= 0 ||
827 memcmp(&self->magic, __perf_magic, sizeof(self->magic))) 760 memcmp(&header->magic, __perf_magic, sizeof(header->magic)))
828 return -1; 761 return -1;
829 762
830 if (repipe && do_write(STDOUT_FILENO, self, sizeof(*self)) < 0) 763 if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0)
831 return -1; 764 return -1;
832 765
833 if (self->size != sizeof(*self)) { 766 if (header->size != sizeof(*header)) {
834 u64 size = bswap_64(self->size); 767 u64 size = bswap_64(header->size);
835 768
836 if (size != sizeof(*self)) 769 if (size != sizeof(*header))
837 return -1; 770 return -1;
838 771
839 ph->needs_swap = true; 772 ph->needs_swap = true;
@@ -844,10 +777,10 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *self,
844 777
845static int perf_header__read_pipe(struct perf_session *session, int fd) 778static int perf_header__read_pipe(struct perf_session *session, int fd)
846{ 779{
847 struct perf_header *self = &session->header; 780 struct perf_header *header = &session->header;
848 struct perf_pipe_file_header f_header; 781 struct perf_pipe_file_header f_header;
849 782
850 if (perf_file_header__read_pipe(&f_header, self, fd, 783 if (perf_file_header__read_pipe(&f_header, header, fd,
851 session->repipe) < 0) { 784 session->repipe) < 0) {
852 pr_debug("incompatible file format\n"); 785 pr_debug("incompatible file format\n");
853 return -EINVAL; 786 return -EINVAL;
@@ -858,18 +791,22 @@ static int perf_header__read_pipe(struct perf_session *session, int fd)
858 return 0; 791 return 0;
859} 792}
860 793
861int perf_header__read(struct perf_session *session, int fd) 794int perf_session__read_header(struct perf_session *session, int fd)
862{ 795{
863 struct perf_header *self = &session->header; 796 struct perf_header *header = &session->header;
864 struct perf_file_header f_header; 797 struct perf_file_header f_header;
865 struct perf_file_attr f_attr; 798 struct perf_file_attr f_attr;
866 u64 f_id; 799 u64 f_id;
867 int nr_attrs, nr_ids, i, j; 800 int nr_attrs, nr_ids, i, j;
868 801
802 session->evlist = perf_evlist__new(NULL, NULL);
803 if (session->evlist == NULL)
804 return -ENOMEM;
805
869 if (session->fd_pipe) 806 if (session->fd_pipe)
870 return perf_header__read_pipe(session, fd); 807 return perf_header__read_pipe(session, fd);
871 808
872 if (perf_file_header__read(&f_header, self, fd) < 0) { 809 if (perf_file_header__read(&f_header, header, fd) < 0) {
873 pr_debug("incompatible file format\n"); 810 pr_debug("incompatible file format\n");
874 return -EINVAL; 811 return -EINVAL;
875 } 812 }
@@ -878,33 +815,39 @@ int perf_header__read(struct perf_session *session, int fd)
878 lseek(fd, f_header.attrs.offset, SEEK_SET); 815 lseek(fd, f_header.attrs.offset, SEEK_SET);
879 816
880 for (i = 0; i < nr_attrs; i++) { 817 for (i = 0; i < nr_attrs; i++) {
881 struct perf_header_attr *attr; 818 struct perf_evsel *evsel;
882 off_t tmp; 819 off_t tmp;
883 820
884 if (perf_header__getbuffer64(self, fd, &f_attr, sizeof(f_attr))) 821 if (perf_header__getbuffer64(header, fd, &f_attr, sizeof(f_attr)))
885 goto out_errno; 822 goto out_errno;
886 823
887 tmp = lseek(fd, 0, SEEK_CUR); 824 tmp = lseek(fd, 0, SEEK_CUR);
825 evsel = perf_evsel__new(&f_attr.attr, i);
888 826
889 attr = perf_header_attr__new(&f_attr.attr); 827 if (evsel == NULL)
890 if (attr == NULL) 828 goto out_delete_evlist;
891 return -ENOMEM; 829 /*
830 * Do it before so that if perf_evsel__alloc_id fails, this
831 * entry gets purged too at perf_evlist__delete().
832 */
833 perf_evlist__add(session->evlist, evsel);
892 834
893 nr_ids = f_attr.ids.size / sizeof(u64); 835 nr_ids = f_attr.ids.size / sizeof(u64);
836 /*
837 * We don't have the cpu and thread maps on the header, so
838 * for allocating the perf_sample_id table we fake 1 cpu and
839 * hattr->ids threads.
840 */
841 if (perf_evsel__alloc_id(evsel, 1, nr_ids))
842 goto out_delete_evlist;
843
894 lseek(fd, f_attr.ids.offset, SEEK_SET); 844 lseek(fd, f_attr.ids.offset, SEEK_SET);
895 845
896 for (j = 0; j < nr_ids; j++) { 846 for (j = 0; j < nr_ids; j++) {
897 if (perf_header__getbuffer64(self, fd, &f_id, sizeof(f_id))) 847 if (perf_header__getbuffer64(header, fd, &f_id, sizeof(f_id)))
898 goto out_errno; 848 goto out_errno;
899 849
900 if (perf_header_attr__add_id(attr, f_id) < 0) { 850 perf_evlist__id_add(session->evlist, evsel, 0, j, f_id);
901 perf_header_attr__delete(attr);
902 return -ENOMEM;
903 }
904 }
905 if (perf_header__add_attr(self, attr) < 0) {
906 perf_header_attr__delete(attr);
907 return -ENOMEM;
908 } 851 }
909 852
910 lseek(fd, tmp, SEEK_SET); 853 lseek(fd, tmp, SEEK_SET);
@@ -915,93 +858,63 @@ int perf_header__read(struct perf_session *session, int fd)
915 events = malloc(f_header.event_types.size); 858 events = malloc(f_header.event_types.size);
916 if (events == NULL) 859 if (events == NULL)
917 return -ENOMEM; 860 return -ENOMEM;
918 if (perf_header__getbuffer64(self, fd, events, 861 if (perf_header__getbuffer64(header, fd, events,
919 f_header.event_types.size)) 862 f_header.event_types.size))
920 goto out_errno; 863 goto out_errno;
921 event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type); 864 event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type);
922 } 865 }
923 866
924 perf_header__process_sections(self, fd, perf_file_section__process); 867 perf_header__process_sections(header, fd, perf_file_section__process);
925 868
926 lseek(fd, self->data_offset, SEEK_SET); 869 lseek(fd, header->data_offset, SEEK_SET);
927 870
928 self->frozen = 1; 871 header->frozen = 1;
929 return 0; 872 return 0;
930out_errno: 873out_errno:
931 return -errno; 874 return -errno;
875
876out_delete_evlist:
877 perf_evlist__delete(session->evlist);
878 session->evlist = NULL;
879 return -ENOMEM;
932} 880}
933 881
934u64 perf_header__sample_type(struct perf_header *header) 882u64 perf_evlist__sample_type(struct perf_evlist *evlist)
935{ 883{
884 struct perf_evsel *pos;
936 u64 type = 0; 885 u64 type = 0;
937 int i;
938
939 for (i = 0; i < header->attrs; i++) {
940 struct perf_header_attr *attr = header->attr[i];
941 886
887 list_for_each_entry(pos, &evlist->entries, node) {
942 if (!type) 888 if (!type)
943 type = attr->attr.sample_type; 889 type = pos->attr.sample_type;
944 else if (type != attr->attr.sample_type) 890 else if (type != pos->attr.sample_type)
945 die("non matching sample_type"); 891 die("non matching sample_type");
946 } 892 }
947 893
948 return type; 894 return type;
949} 895}
950 896
951bool perf_header__sample_id_all(const struct perf_header *header) 897bool perf_evlist__sample_id_all(const struct perf_evlist *evlist)
952{ 898{
953 bool value = false, first = true; 899 bool value = false, first = true;
954 int i; 900 struct perf_evsel *pos;
955
956 for (i = 0; i < header->attrs; i++) {
957 struct perf_header_attr *attr = header->attr[i];
958 901
902 list_for_each_entry(pos, &evlist->entries, node) {
959 if (first) { 903 if (first) {
960 value = attr->attr.sample_id_all; 904 value = pos->attr.sample_id_all;
961 first = false; 905 first = false;
962 } else if (value != attr->attr.sample_id_all) 906 } else if (value != pos->attr.sample_id_all)
963 die("non matching sample_id_all"); 907 die("non matching sample_id_all");
964 } 908 }
965 909
966 return value; 910 return value;
967} 911}
968 912
969struct perf_event_attr * 913int perf_event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
970perf_header__find_attr(u64 id, struct perf_header *header) 914 perf_event__handler_t process,
971{ 915 struct perf_session *session)
972 int i;
973
974 /*
975 * We set id to -1 if the data file doesn't contain sample
976 * ids. This can happen when the data file contains one type
977 * of event and in that case, the header can still store the
978 * event attribute information. Check for this and avoid
979 * walking through the entire list of ids which may be large.
980 */
981 if (id == -1ULL) {
982 if (header->attrs > 0)
983 return &header->attr[0]->attr;
984 return NULL;
985 }
986
987 for (i = 0; i < header->attrs; i++) {
988 struct perf_header_attr *attr = header->attr[i];
989 int j;
990
991 for (j = 0; j < attr->ids; j++) {
992 if (attr->id[j] == id)
993 return &attr->attr;
994 }
995 }
996
997 return NULL;
998}
999
1000int event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
1001 event__handler_t process,
1002 struct perf_session *session)
1003{ 916{
1004 event_t *ev; 917 union perf_event *ev;
1005 size_t size; 918 size_t size;
1006 int err; 919 int err;
1007 920
@@ -1028,17 +941,15 @@ int event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
1028 return err; 941 return err;
1029} 942}
1030 943
1031int event__synthesize_attrs(struct perf_header *self, event__handler_t process, 944int perf_session__synthesize_attrs(struct perf_session *session,
1032 struct perf_session *session) 945 perf_event__handler_t process)
1033{ 946{
1034 struct perf_header_attr *attr; 947 struct perf_evsel *attr;
1035 int i, err = 0; 948 int err = 0;
1036
1037 for (i = 0; i < self->attrs; i++) {
1038 attr = self->attr[i];
1039 949
1040 err = event__synthesize_attr(&attr->attr, attr->ids, attr->id, 950 list_for_each_entry(attr, &session->evlist->entries, node) {
1041 process, session); 951 err = perf_event__synthesize_attr(&attr->attr, attr->ids,
952 attr->id, process, session);
1042 if (err) { 953 if (err) {
1043 pr_debug("failed to create perf header attribute\n"); 954 pr_debug("failed to create perf header attribute\n");
1044 return err; 955 return err;
@@ -1048,29 +959,39 @@ int event__synthesize_attrs(struct perf_header *self, event__handler_t process,
1048 return err; 959 return err;
1049} 960}
1050 961
1051int event__process_attr(event_t *self, struct perf_session *session) 962int perf_event__process_attr(union perf_event *event,
963 struct perf_session *session)
1052{ 964{
1053 struct perf_header_attr *attr;
1054 unsigned int i, ids, n_ids; 965 unsigned int i, ids, n_ids;
966 struct perf_evsel *evsel;
967
968 if (session->evlist == NULL) {
969 session->evlist = perf_evlist__new(NULL, NULL);
970 if (session->evlist == NULL)
971 return -ENOMEM;
972 }
1055 973
1056 attr = perf_header_attr__new(&self->attr.attr); 974 evsel = perf_evsel__new(&event->attr.attr,
1057 if (attr == NULL) 975 session->evlist->nr_entries);
976 if (evsel == NULL)
1058 return -ENOMEM; 977 return -ENOMEM;
1059 978
1060 ids = self->header.size; 979 perf_evlist__add(session->evlist, evsel);
1061 ids -= (void *)&self->attr.id - (void *)self; 980
981 ids = event->header.size;
982 ids -= (void *)&event->attr.id - (void *)event;
1062 n_ids = ids / sizeof(u64); 983 n_ids = ids / sizeof(u64);
984 /*
985 * We don't have the cpu and thread maps on the header, so
986 * for allocating the perf_sample_id table we fake 1 cpu and
987 * hattr->ids threads.
988 */
989 if (perf_evsel__alloc_id(evsel, 1, n_ids))
990 return -ENOMEM;
1063 991
1064 for (i = 0; i < n_ids; i++) { 992 for (i = 0; i < n_ids; i++) {
1065 if (perf_header_attr__add_id(attr, self->attr.id[i]) < 0) { 993 perf_evlist__id_add(session->evlist, evsel, 0, i,
1066 perf_header_attr__delete(attr); 994 event->attr.id[i]);
1067 return -ENOMEM;
1068 }
1069 }
1070
1071 if (perf_header__add_attr(&session->header, attr) < 0) {
1072 perf_header_attr__delete(attr);
1073 return -ENOMEM;
1074 } 995 }
1075 996
1076 perf_session__update_sample_type(session); 997 perf_session__update_sample_type(session);
@@ -1078,11 +999,11 @@ int event__process_attr(event_t *self, struct perf_session *session)
1078 return 0; 999 return 0;
1079} 1000}
1080 1001
1081int event__synthesize_event_type(u64 event_id, char *name, 1002int perf_event__synthesize_event_type(u64 event_id, char *name,
1082 event__handler_t process, 1003 perf_event__handler_t process,
1083 struct perf_session *session) 1004 struct perf_session *session)
1084{ 1005{
1085 event_t ev; 1006 union perf_event ev;
1086 size_t size = 0; 1007 size_t size = 0;
1087 int err = 0; 1008 int err = 0;
1088 1009
@@ -1103,8 +1024,8 @@ int event__synthesize_event_type(u64 event_id, char *name,
1103 return err; 1024 return err;
1104} 1025}
1105 1026
1106int event__synthesize_event_types(event__handler_t process, 1027int perf_event__synthesize_event_types(perf_event__handler_t process,
1107 struct perf_session *session) 1028 struct perf_session *session)
1108{ 1029{
1109 struct perf_trace_event_type *type; 1030 struct perf_trace_event_type *type;
1110 int i, err = 0; 1031 int i, err = 0;
@@ -1112,8 +1033,9 @@ int event__synthesize_event_types(event__handler_t process,
1112 for (i = 0; i < event_count; i++) { 1033 for (i = 0; i < event_count; i++) {
1113 type = &events[i]; 1034 type = &events[i];
1114 1035
1115 err = event__synthesize_event_type(type->event_id, type->name, 1036 err = perf_event__synthesize_event_type(type->event_id,
1116 process, session); 1037 type->name, process,
1038 session);
1117 if (err) { 1039 if (err) {
1118 pr_debug("failed to create perf header event type\n"); 1040 pr_debug("failed to create perf header event type\n");
1119 return err; 1041 return err;
@@ -1123,28 +1045,28 @@ int event__synthesize_event_types(event__handler_t process,
1123 return err; 1045 return err;
1124} 1046}
1125 1047
1126int event__process_event_type(event_t *self, 1048int perf_event__process_event_type(union perf_event *event,
1127 struct perf_session *session __unused) 1049 struct perf_session *session __unused)
1128{ 1050{
1129 if (perf_header__push_event(self->event_type.event_type.event_id, 1051 if (perf_header__push_event(event->event_type.event_type.event_id,
1130 self->event_type.event_type.name) < 0) 1052 event->event_type.event_type.name) < 0)
1131 return -ENOMEM; 1053 return -ENOMEM;
1132 1054
1133 return 0; 1055 return 0;
1134} 1056}
1135 1057
1136int event__synthesize_tracing_data(int fd, struct list_head *pattrs, 1058int perf_event__synthesize_tracing_data(int fd, struct perf_evlist *evlist,
1137 event__handler_t process, 1059 perf_event__handler_t process,
1138 struct perf_session *session __unused) 1060 struct perf_session *session __unused)
1139{ 1061{
1140 event_t ev; 1062 union perf_event ev;
1141 ssize_t size = 0, aligned_size = 0, padding; 1063 ssize_t size = 0, aligned_size = 0, padding;
1142 int err = 0; 1064 int err __used = 0;
1143 1065
1144 memset(&ev, 0, sizeof(ev)); 1066 memset(&ev, 0, sizeof(ev));
1145 1067
1146 ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA; 1068 ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA;
1147 size = read_tracing_data_size(fd, pattrs); 1069 size = read_tracing_data_size(fd, &evlist->entries);
1148 if (size <= 0) 1070 if (size <= 0)
1149 return size; 1071 return size;
1150 aligned_size = ALIGN(size, sizeof(u64)); 1072 aligned_size = ALIGN(size, sizeof(u64));
@@ -1154,16 +1076,16 @@ int event__synthesize_tracing_data(int fd, struct list_head *pattrs,
1154 1076
1155 process(&ev, NULL, session); 1077 process(&ev, NULL, session);
1156 1078
1157 err = read_tracing_data(fd, pattrs); 1079 err = read_tracing_data(fd, &evlist->entries);
1158 write_padded(fd, NULL, 0, padding); 1080 write_padded(fd, NULL, 0, padding);
1159 1081
1160 return aligned_size; 1082 return aligned_size;
1161} 1083}
1162 1084
1163int event__process_tracing_data(event_t *self, 1085int perf_event__process_tracing_data(union perf_event *event,
1164 struct perf_session *session) 1086 struct perf_session *session)
1165{ 1087{
1166 ssize_t size_read, padding, size = self->tracing_data.size; 1088 ssize_t size_read, padding, size = event->tracing_data.size;
1167 off_t offset = lseek(session->fd, 0, SEEK_CUR); 1089 off_t offset = lseek(session->fd, 0, SEEK_CUR);
1168 char buf[BUFSIZ]; 1090 char buf[BUFSIZ];
1169 1091
@@ -1189,12 +1111,12 @@ int event__process_tracing_data(event_t *self,
1189 return size_read + padding; 1111 return size_read + padding;
1190} 1112}
1191 1113
1192int event__synthesize_build_id(struct dso *pos, u16 misc, 1114int perf_event__synthesize_build_id(struct dso *pos, u16 misc,
1193 event__handler_t process, 1115 perf_event__handler_t process,
1194 struct machine *machine, 1116 struct machine *machine,
1195 struct perf_session *session) 1117 struct perf_session *session)
1196{ 1118{
1197 event_t ev; 1119 union perf_event ev;
1198 size_t len; 1120 size_t len;
1199 int err = 0; 1121 int err = 0;
1200 1122
@@ -1217,11 +1139,11 @@ int event__synthesize_build_id(struct dso *pos, u16 misc,
1217 return err; 1139 return err;
1218} 1140}
1219 1141
1220int event__process_build_id(event_t *self, 1142int perf_event__process_build_id(union perf_event *event,
1221 struct perf_session *session) 1143 struct perf_session *session)
1222{ 1144{
1223 __event_process_build_id(&self->build_id, 1145 __event_process_build_id(&event->build_id,
1224 self->build_id.filename, 1146 event->build_id.filename,
1225 session); 1147 session);
1226 return 0; 1148 return 0;
1227} 1149}
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 33f16be7b72f..456661d7f10e 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -9,13 +9,6 @@
9 9
10#include <linux/bitmap.h> 10#include <linux/bitmap.h>
11 11
12struct perf_header_attr {
13 struct perf_event_attr attr;
14 int ids, size;
15 u64 *id;
16 off_t id_offset;
17};
18
19enum { 12enum {
20 HEADER_TRACE_INFO = 1, 13 HEADER_TRACE_INFO = 1,
21 HEADER_BUILD_ID, 14 HEADER_BUILD_ID,
@@ -46,14 +39,12 @@ struct perf_pipe_file_header {
46 39
47struct perf_header; 40struct perf_header;
48 41
49int perf_file_header__read(struct perf_file_header *self, 42int perf_file_header__read(struct perf_file_header *header,
50 struct perf_header *ph, int fd); 43 struct perf_header *ph, int fd);
51 44
52struct perf_header { 45struct perf_header {
53 int frozen; 46 int frozen;
54 int attrs, size;
55 bool needs_swap; 47 bool needs_swap;
56 struct perf_header_attr **attr;
57 s64 attr_offset; 48 s64 attr_offset;
58 u64 data_offset; 49 u64 data_offset;
59 u64 data_size; 50 u64 data_size;
@@ -62,34 +53,25 @@ struct perf_header {
62 DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); 53 DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
63}; 54};
64 55
65int perf_header__init(struct perf_header *self); 56struct perf_evlist;
66void perf_header__exit(struct perf_header *self);
67 57
68int perf_header__read(struct perf_session *session, int fd); 58int perf_session__read_header(struct perf_session *session, int fd);
69int perf_header__write(struct perf_header *self, int fd, bool at_exit); 59int perf_session__write_header(struct perf_session *session,
60 struct perf_evlist *evlist,
61 int fd, bool at_exit);
70int perf_header__write_pipe(int fd); 62int perf_header__write_pipe(int fd);
71 63
72int perf_header__add_attr(struct perf_header *self,
73 struct perf_header_attr *attr);
74
75int perf_header__push_event(u64 id, const char *name); 64int perf_header__push_event(u64 id, const char *name);
76char *perf_header__find_event(u64 id); 65char *perf_header__find_event(u64 id);
77 66
78struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr); 67u64 perf_evlist__sample_type(struct perf_evlist *evlist);
79void perf_header_attr__delete(struct perf_header_attr *self); 68bool perf_evlist__sample_id_all(const struct perf_evlist *evlist);
69void perf_header__set_feat(struct perf_header *header, int feat);
70void perf_header__clear_feat(struct perf_header *header, int feat);
71bool perf_header__has_feat(const struct perf_header *header, int feat);
80 72
81int perf_header_attr__add_id(struct perf_header_attr *self, u64 id); 73int perf_header__process_sections(struct perf_header *header, int fd,
82 74 int (*process)(struct perf_file_section *section,
83u64 perf_header__sample_type(struct perf_header *header);
84bool perf_header__sample_id_all(const struct perf_header *header);
85struct perf_event_attr *
86perf_header__find_attr(u64 id, struct perf_header *header);
87void perf_header__set_feat(struct perf_header *self, int feat);
88void perf_header__clear_feat(struct perf_header *self, int feat);
89bool perf_header__has_feat(const struct perf_header *self, int feat);
90
91int perf_header__process_sections(struct perf_header *self, int fd,
92 int (*process)(struct perf_file_section *self,
93 struct perf_header *ph, 75 struct perf_header *ph,
94 int feat, int fd)); 76 int feat, int fd));
95 77
@@ -97,32 +79,31 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
97 const char *name, bool is_kallsyms); 79 const char *name, bool is_kallsyms);
98int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir); 80int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir);
99 81
100int event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id, 82int perf_event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
101 event__handler_t process, 83 perf_event__handler_t process,
102 struct perf_session *session);
103int event__synthesize_attrs(struct perf_header *self,
104 event__handler_t process,
105 struct perf_session *session);
106int event__process_attr(event_t *self, struct perf_session *session);
107
108int event__synthesize_event_type(u64 event_id, char *name,
109 event__handler_t process,
110 struct perf_session *session);
111int event__synthesize_event_types(event__handler_t process,
112 struct perf_session *session);
113int event__process_event_type(event_t *self,
114 struct perf_session *session);
115
116int event__synthesize_tracing_data(int fd, struct list_head *pattrs,
117 event__handler_t process,
118 struct perf_session *session);
119int event__process_tracing_data(event_t *self,
120 struct perf_session *session); 84 struct perf_session *session);
85int perf_session__synthesize_attrs(struct perf_session *session,
86 perf_event__handler_t process);
87int perf_event__process_attr(union perf_event *event, struct perf_session *session);
88
89int perf_event__synthesize_event_type(u64 event_id, char *name,
90 perf_event__handler_t process,
91 struct perf_session *session);
92int perf_event__synthesize_event_types(perf_event__handler_t process,
93 struct perf_session *session);
94int perf_event__process_event_type(union perf_event *event,
95 struct perf_session *session);
121 96
122int event__synthesize_build_id(struct dso *pos, u16 misc, 97int perf_event__synthesize_tracing_data(int fd, struct perf_evlist *evlist,
123 event__handler_t process, 98 perf_event__handler_t process,
124 struct machine *machine, 99 struct perf_session *session);
125 struct perf_session *session); 100int perf_event__process_tracing_data(union perf_event *event,
126int event__process_build_id(event_t *self, struct perf_session *session); 101 struct perf_session *session);
127 102
103int perf_event__synthesize_build_id(struct dso *pos, u16 misc,
104 perf_event__handler_t process,
105 struct machine *machine,
106 struct perf_session *session);
107int perf_event__process_build_id(union perf_event *event,
108 struct perf_session *session);
128#endif /* __PERF_HEADER_H */ 109#endif /* __PERF_HEADER_H */
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 32f4f1f2f6e4..627a02e03c57 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1,3 +1,4 @@
1#include "annotate.h"
1#include "util.h" 2#include "util.h"
2#include "build-id.h" 3#include "build-id.h"
3#include "hist.h" 4#include "hist.h"
@@ -49,6 +50,15 @@ static void hists__calc_col_len(struct hists *self, struct hist_entry *h)
49 50
50 if (h->ms.sym) 51 if (h->ms.sym)
51 hists__new_col_len(self, HISTC_SYMBOL, h->ms.sym->namelen); 52 hists__new_col_len(self, HISTC_SYMBOL, h->ms.sym->namelen);
53 else {
54 const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
55
56 if (hists__col_len(self, HISTC_DSO) < unresolved_col_width &&
57 !symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
58 !symbol_conf.dso_list)
59 hists__set_col_len(self, HISTC_DSO,
60 unresolved_col_width);
61 }
52 62
53 len = thread__comm_len(h->thread); 63 len = thread__comm_len(h->thread);
54 if (hists__new_col_len(self, HISTC_COMM, len)) 64 if (hists__new_col_len(self, HISTC_COMM, len))
@@ -211,7 +221,9 @@ void hist_entry__free(struct hist_entry *he)
211 * collapse the histogram 221 * collapse the histogram
212 */ 222 */
213 223
214static bool collapse__insert_entry(struct rb_root *root, struct hist_entry *he) 224static bool hists__collapse_insert_entry(struct hists *self,
225 struct rb_root *root,
226 struct hist_entry *he)
215{ 227{
216 struct rb_node **p = &root->rb_node; 228 struct rb_node **p = &root->rb_node;
217 struct rb_node *parent = NULL; 229 struct rb_node *parent = NULL;
@@ -226,8 +238,11 @@ static bool collapse__insert_entry(struct rb_root *root, struct hist_entry *he)
226 238
227 if (!cmp) { 239 if (!cmp) {
228 iter->period += he->period; 240 iter->period += he->period;
229 if (symbol_conf.use_callchain) 241 if (symbol_conf.use_callchain) {
230 callchain_merge(iter->callchain, he->callchain); 242 callchain_cursor_reset(&self->callchain_cursor);
243 callchain_merge(&self->callchain_cursor, iter->callchain,
244 he->callchain);
245 }
231 hist_entry__free(he); 246 hist_entry__free(he);
232 return false; 247 return false;
233 } 248 }
@@ -262,7 +277,7 @@ void hists__collapse_resort(struct hists *self)
262 next = rb_next(&n->rb_node); 277 next = rb_next(&n->rb_node);
263 278
264 rb_erase(&n->rb_node, &self->entries); 279 rb_erase(&n->rb_node, &self->entries);
265 if (collapse__insert_entry(&tmp, n)) 280 if (hists__collapse_insert_entry(self, &tmp, n))
266 hists__inc_nr_entries(self, n); 281 hists__inc_nr_entries(self, n);
267 } 282 }
268 283
@@ -425,7 +440,7 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
425 u64 cumul; 440 u64 cumul;
426 441
427 child = rb_entry(node, struct callchain_node, rb_node); 442 child = rb_entry(node, struct callchain_node, rb_node);
428 cumul = cumul_hits(child); 443 cumul = callchain_cumul_hits(child);
429 remaining -= cumul; 444 remaining -= cumul;
430 445
431 /* 446 /*
@@ -585,6 +600,7 @@ int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size,
585{ 600{
586 struct sort_entry *se; 601 struct sort_entry *se;
587 u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us; 602 u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us;
603 u64 nr_events;
588 const char *sep = symbol_conf.field_sep; 604 const char *sep = symbol_conf.field_sep;
589 int ret; 605 int ret;
590 606
@@ -593,6 +609,7 @@ int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size,
593 609
594 if (pair_hists) { 610 if (pair_hists) {
595 period = self->pair ? self->pair->period : 0; 611 period = self->pair ? self->pair->period : 0;
612 nr_events = self->pair ? self->pair->nr_events : 0;
596 total = pair_hists->stats.total_period; 613 total = pair_hists->stats.total_period;
597 period_sys = self->pair ? self->pair->period_sys : 0; 614 period_sys = self->pair ? self->pair->period_sys : 0;
598 period_us = self->pair ? self->pair->period_us : 0; 615 period_us = self->pair ? self->pair->period_us : 0;
@@ -600,6 +617,7 @@ int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size,
600 period_guest_us = self->pair ? self->pair->period_guest_us : 0; 617 period_guest_us = self->pair ? self->pair->period_guest_us : 0;
601 } else { 618 } else {
602 period = self->period; 619 period = self->period;
620 nr_events = self->nr_events;
603 total = session_total; 621 total = session_total;
604 period_sys = self->period_sys; 622 period_sys = self->period_sys;
605 period_us = self->period_us; 623 period_us = self->period_us;
@@ -640,9 +658,9 @@ int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size,
640 658
641 if (symbol_conf.show_nr_samples) { 659 if (symbol_conf.show_nr_samples) {
642 if (sep) 660 if (sep)
643 ret += snprintf(s + ret, size - ret, "%c%" PRIu64, *sep, period); 661 ret += snprintf(s + ret, size - ret, "%c%" PRIu64, *sep, nr_events);
644 else 662 else
645 ret += snprintf(s + ret, size - ret, "%11" PRIu64, period); 663 ret += snprintf(s + ret, size - ret, "%11" PRIu64, nr_events);
646 } 664 }
647 665
648 if (pair_hists) { 666 if (pair_hists) {
@@ -944,225 +962,14 @@ void hists__filter_by_thread(struct hists *self, const struct thread *thread)
944 } 962 }
945} 963}
946 964
947static int symbol__alloc_hist(struct symbol *self) 965int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 ip)
948{
949 struct sym_priv *priv = symbol__priv(self);
950 const int size = (sizeof(*priv->hist) +
951 (self->end - self->start) * sizeof(u64));
952
953 priv->hist = zalloc(size);
954 return priv->hist == NULL ? -1 : 0;
955}
956
957int hist_entry__inc_addr_samples(struct hist_entry *self, u64 ip)
958{
959 unsigned int sym_size, offset;
960 struct symbol *sym = self->ms.sym;
961 struct sym_priv *priv;
962 struct sym_hist *h;
963
964 if (!sym || !self->ms.map)
965 return 0;
966
967 priv = symbol__priv(sym);
968 if (priv->hist == NULL && symbol__alloc_hist(sym) < 0)
969 return -ENOMEM;
970
971 sym_size = sym->end - sym->start;
972 offset = ip - sym->start;
973
974 pr_debug3("%s: ip=%#" PRIx64 "\n", __func__, self->ms.map->unmap_ip(self->ms.map, ip));
975
976 if (offset >= sym_size)
977 return 0;
978
979 h = priv->hist;
980 h->sum++;
981 h->ip[offset]++;
982
983 pr_debug3("%#" PRIx64 " %s: period++ [ip: %#" PRIx64 ", %#" PRIx64
984 "] => %" PRIu64 "\n", self->ms.sym->start, self->ms.sym->name,
985 ip, ip - self->ms.sym->start, h->ip[offset]);
986 return 0;
987}
988
989static struct objdump_line *objdump_line__new(s64 offset, char *line, size_t privsize)
990{
991 struct objdump_line *self = malloc(sizeof(*self) + privsize);
992
993 if (self != NULL) {
994 self->offset = offset;
995 self->line = line;
996 }
997
998 return self;
999}
1000
1001void objdump_line__free(struct objdump_line *self)
1002{
1003 free(self->line);
1004 free(self);
1005}
1006
1007static void objdump__add_line(struct list_head *head, struct objdump_line *line)
1008{
1009 list_add_tail(&line->node, head);
1010}
1011
1012struct objdump_line *objdump__get_next_ip_line(struct list_head *head,
1013 struct objdump_line *pos)
1014{
1015 list_for_each_entry_continue(pos, head, node)
1016 if (pos->offset >= 0)
1017 return pos;
1018
1019 return NULL;
1020}
1021
1022static int hist_entry__parse_objdump_line(struct hist_entry *self, FILE *file,
1023 struct list_head *head, size_t privsize)
1024{ 966{
1025 struct symbol *sym = self->ms.sym; 967 return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip);
1026 struct objdump_line *objdump_line;
1027 char *line = NULL, *tmp, *tmp2, *c;
1028 size_t line_len;
1029 s64 line_ip, offset = -1;
1030
1031 if (getline(&line, &line_len, file) < 0)
1032 return -1;
1033
1034 if (!line)
1035 return -1;
1036
1037 while (line_len != 0 && isspace(line[line_len - 1]))
1038 line[--line_len] = '\0';
1039
1040 c = strchr(line, '\n');
1041 if (c)
1042 *c = 0;
1043
1044 line_ip = -1;
1045
1046 /*
1047 * Strip leading spaces:
1048 */
1049 tmp = line;
1050 while (*tmp) {
1051 if (*tmp != ' ')
1052 break;
1053 tmp++;
1054 }
1055
1056 if (*tmp) {
1057 /*
1058 * Parse hexa addresses followed by ':'
1059 */
1060 line_ip = strtoull(tmp, &tmp2, 16);
1061 if (*tmp2 != ':' || tmp == tmp2 || tmp2[1] == '\0')
1062 line_ip = -1;
1063 }
1064
1065 if (line_ip != -1) {
1066 u64 start = map__rip_2objdump(self->ms.map, sym->start),
1067 end = map__rip_2objdump(self->ms.map, sym->end);
1068
1069 offset = line_ip - start;
1070 if (offset < 0 || (u64)line_ip > end)
1071 offset = -1;
1072 }
1073
1074 objdump_line = objdump_line__new(offset, line, privsize);
1075 if (objdump_line == NULL) {
1076 free(line);
1077 return -1;
1078 }
1079 objdump__add_line(head, objdump_line);
1080
1081 return 0;
1082} 968}
1083 969
1084int hist_entry__annotate(struct hist_entry *self, struct list_head *head, 970int hist_entry__annotate(struct hist_entry *he, size_t privsize)
1085 size_t privsize)
1086{ 971{
1087 struct symbol *sym = self->ms.sym; 972 return symbol__annotate(he->ms.sym, he->ms.map, privsize);
1088 struct map *map = self->ms.map;
1089 struct dso *dso = map->dso;
1090 char *filename = dso__build_id_filename(dso, NULL, 0);
1091 bool free_filename = true;
1092 char command[PATH_MAX * 2];
1093 FILE *file;
1094 int err = 0;
1095 u64 len;
1096 char symfs_filename[PATH_MAX];
1097
1098 if (filename) {
1099 snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
1100 symbol_conf.symfs, filename);
1101 }
1102
1103 if (filename == NULL) {
1104 if (dso->has_build_id) {
1105 pr_err("Can't annotate %s: not enough memory\n",
1106 sym->name);
1107 return -ENOMEM;
1108 }
1109 goto fallback;
1110 } else if (readlink(symfs_filename, command, sizeof(command)) < 0 ||
1111 strstr(command, "[kernel.kallsyms]") ||
1112 access(symfs_filename, R_OK)) {
1113 free(filename);
1114fallback:
1115 /*
1116 * If we don't have build-ids or the build-id file isn't in the
1117 * cache, or is just a kallsyms file, well, lets hope that this
1118 * DSO is the same as when 'perf record' ran.
1119 */
1120 filename = dso->long_name;
1121 snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
1122 symbol_conf.symfs, filename);
1123 free_filename = false;
1124 }
1125
1126 if (dso->origin == DSO__ORIG_KERNEL) {
1127 if (dso->annotate_warned)
1128 goto out_free_filename;
1129 err = -ENOENT;
1130 dso->annotate_warned = 1;
1131 pr_err("Can't annotate %s: No vmlinux file was found in the "
1132 "path\n", sym->name);
1133 goto out_free_filename;
1134 }
1135
1136 pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
1137 filename, sym->name, map->unmap_ip(map, sym->start),
1138 map->unmap_ip(map, sym->end));
1139
1140 len = sym->end - sym->start;
1141
1142 pr_debug("annotating [%p] %30s : [%p] %30s\n",
1143 dso, dso->long_name, sym, sym->name);
1144
1145 snprintf(command, sizeof(command),
1146 "objdump --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 " -dS -C %s|grep -v %s|expand",
1147 map__rip_2objdump(map, sym->start),
1148 map__rip_2objdump(map, sym->end),
1149 symfs_filename, filename);
1150
1151 pr_debug("Executing: %s\n", command);
1152
1153 file = popen(command, "r");
1154 if (!file)
1155 goto out_free_filename;
1156
1157 while (!feof(file))
1158 if (hist_entry__parse_objdump_line(self, file, head, privsize) < 0)
1159 break;
1160
1161 pclose(file);
1162out_free_filename:
1163 if (free_filename)
1164 free(filename);
1165 return err;
1166} 973}
1167 974
1168void hists__inc_nr_events(struct hists *self, u32 type) 975void hists__inc_nr_events(struct hists *self, u32 type)
@@ -1177,8 +984,12 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp)
1177 size_t ret = 0; 984 size_t ret = 0;
1178 985
1179 for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) { 986 for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) {
1180 const char *name = event__get_event_name(i); 987 const char *name;
988
989 if (self->stats.nr_events[i] == 0)
990 continue;
1181 991
992 name = perf_event__name(i);
1182 if (!strcmp(name, "UNKNOWN")) 993 if (!strcmp(name, "UNKNOWN"))
1183 continue; 994 continue;
1184 995
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index ee789856a8c9..cb6858a2f9a3 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -9,33 +9,6 @@ extern struct callchain_param callchain_param;
9struct hist_entry; 9struct hist_entry;
10struct addr_location; 10struct addr_location;
11struct symbol; 11struct symbol;
12struct rb_root;
13
14struct objdump_line {
15 struct list_head node;
16 s64 offset;
17 char *line;
18};
19
20void objdump_line__free(struct objdump_line *self);
21struct objdump_line *objdump__get_next_ip_line(struct list_head *head,
22 struct objdump_line *pos);
23
24struct sym_hist {
25 u64 sum;
26 u64 ip[0];
27};
28
29struct sym_ext {
30 struct rb_node node;
31 double percent;
32 char *path;
33};
34
35struct sym_priv {
36 struct sym_hist *hist;
37 struct sym_ext *ext;
38};
39 12
40/* 13/*
41 * The kernel collects the number of events it couldn't send in a stretch and 14 * The kernel collects the number of events it couldn't send in a stretch and
@@ -69,14 +42,13 @@ enum hist_column {
69}; 42};
70 43
71struct hists { 44struct hists {
72 struct rb_node rb_node;
73 struct rb_root entries; 45 struct rb_root entries;
74 u64 nr_entries; 46 u64 nr_entries;
75 struct events_stats stats; 47 struct events_stats stats;
76 u64 config;
77 u64 event_stream; 48 u64 event_stream;
78 u32 type;
79 u16 col_len[HISTC_NR_COLS]; 49 u16 col_len[HISTC_NR_COLS];
50 /* Best would be to reuse the session callchain cursor */
51 struct callchain_cursor callchain_cursor;
80}; 52};
81 53
82struct hist_entry *__hists__add_entry(struct hists *self, 54struct hist_entry *__hists__add_entry(struct hists *self,
@@ -102,9 +74,8 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp);
102size_t hists__fprintf(struct hists *self, struct hists *pair, 74size_t hists__fprintf(struct hists *self, struct hists *pair,
103 bool show_displacement, FILE *fp); 75 bool show_displacement, FILE *fp);
104 76
105int hist_entry__inc_addr_samples(struct hist_entry *self, u64 ip); 77int hist_entry__inc_addr_samples(struct hist_entry *self, int evidx, u64 addr);
106int hist_entry__annotate(struct hist_entry *self, struct list_head *head, 78int hist_entry__annotate(struct hist_entry *self, size_t privsize);
107 size_t privsize);
108 79
109void hists__filter_by_dso(struct hists *self, const struct dso *dso); 80void hists__filter_by_dso(struct hists *self, const struct dso *dso);
110void hists__filter_by_thread(struct hists *self, const struct thread *thread); 81void hists__filter_by_thread(struct hists *self, const struct thread *thread);
@@ -113,21 +84,18 @@ u16 hists__col_len(struct hists *self, enum hist_column col);
113void hists__set_col_len(struct hists *self, enum hist_column col, u16 len); 84void hists__set_col_len(struct hists *self, enum hist_column col, u16 len);
114bool hists__new_col_len(struct hists *self, enum hist_column col, u16 len); 85bool hists__new_col_len(struct hists *self, enum hist_column col, u16 len);
115 86
116#ifdef NO_NEWT_SUPPORT 87struct perf_evlist;
117static inline int hists__browse(struct hists *self __used,
118 const char *helpline __used,
119 const char *ev_name __used)
120{
121 return 0;
122}
123 88
124static inline int hists__tui_browse_tree(struct rb_root *self __used, 89#ifdef NO_NEWT_SUPPORT
125 const char *help __used) 90static inline
91int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __used,
92 const char *help __used)
126{ 93{
127 return 0; 94 return 0;
128} 95}
129 96
130static inline int hist_entry__tui_annotate(struct hist_entry *self __used) 97static inline int hist_entry__tui_annotate(struct hist_entry *self __used,
98 int evidx __used)
131{ 99{
132 return 0; 100 return 0;
133} 101}
@@ -135,14 +103,12 @@ static inline int hist_entry__tui_annotate(struct hist_entry *self __used)
135#define KEY_RIGHT -2 103#define KEY_RIGHT -2
136#else 104#else
137#include <newt.h> 105#include <newt.h>
138int hists__browse(struct hists *self, const char *helpline, 106int hist_entry__tui_annotate(struct hist_entry *self, int evidx);
139 const char *ev_name);
140int hist_entry__tui_annotate(struct hist_entry *self);
141 107
142#define KEY_LEFT NEWT_KEY_LEFT 108#define KEY_LEFT NEWT_KEY_LEFT
143#define KEY_RIGHT NEWT_KEY_RIGHT 109#define KEY_RIGHT NEWT_KEY_RIGHT
144 110
145int hists__tui_browse_tree(struct rb_root *self, const char *help); 111int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help);
146#endif 112#endif
147 113
148unsigned int hists__sort_list_width(struct hists *self); 114unsigned int hists__sort_list_width(struct hists *self);
diff --git a/tools/perf/util/include/linux/list.h b/tools/perf/util/include/linux/list.h
index f5ca26e53fbb..356c7e467b83 100644
--- a/tools/perf/util/include/linux/list.h
+++ b/tools/perf/util/include/linux/list.h
@@ -1,3 +1,4 @@
1#include <linux/kernel.h>
1#include "../../../../include/linux/list.h" 2#include "../../../../include/linux/list.h"
2 3
3#ifndef PERF_LIST_H 4#ifndef PERF_LIST_H
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 135f69baf966..54a7e2634d58 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1,6 +1,7 @@
1#include "../../../include/linux/hw_breakpoint.h" 1#include "../../../include/linux/hw_breakpoint.h"
2#include "util.h" 2#include "util.h"
3#include "../perf.h" 3#include "../perf.h"
4#include "evlist.h"
4#include "evsel.h" 5#include "evsel.h"
5#include "parse-options.h" 6#include "parse-options.h"
6#include "parse-events.h" 7#include "parse-events.h"
@@ -11,10 +12,6 @@
11#include "header.h" 12#include "header.h"
12#include "debugfs.h" 13#include "debugfs.h"
13 14
14int nr_counters;
15
16LIST_HEAD(evsel_list);
17
18struct event_symbol { 15struct event_symbol {
19 u8 type; 16 u8 type;
20 u64 config; 17 u64 config;
@@ -271,6 +268,9 @@ const char *event_name(struct perf_evsel *evsel)
271 u64 config = evsel->attr.config; 268 u64 config = evsel->attr.config;
272 int type = evsel->attr.type; 269 int type = evsel->attr.type;
273 270
271 if (evsel->name)
272 return evsel->name;
273
274 return __event_name(type, config); 274 return __event_name(type, config);
275} 275}
276 276
@@ -449,8 +449,8 @@ parse_single_tracepoint_event(char *sys_name,
449/* sys + ':' + event + ':' + flags*/ 449/* sys + ':' + event + ':' + flags*/
450#define MAX_EVOPT_LEN (MAX_EVENT_LENGTH * 2 + 2 + 128) 450#define MAX_EVOPT_LEN (MAX_EVENT_LENGTH * 2 + 2 + 128)
451static enum event_result 451static enum event_result
452parse_multiple_tracepoint_event(char *sys_name, const char *evt_exp, 452parse_multiple_tracepoint_event(const struct option *opt, char *sys_name,
453 char *flags) 453 const char *evt_exp, char *flags)
454{ 454{
455 char evt_path[MAXPATHLEN]; 455 char evt_path[MAXPATHLEN];
456 struct dirent *evt_ent; 456 struct dirent *evt_ent;
@@ -483,15 +483,16 @@ parse_multiple_tracepoint_event(char *sys_name, const char *evt_exp,
483 if (len < 0) 483 if (len < 0)
484 return EVT_FAILED; 484 return EVT_FAILED;
485 485
486 if (parse_events(NULL, event_opt, 0)) 486 if (parse_events(opt, event_opt, 0))
487 return EVT_FAILED; 487 return EVT_FAILED;
488 } 488 }
489 489
490 return EVT_HANDLED_ALL; 490 return EVT_HANDLED_ALL;
491} 491}
492 492
493static enum event_result parse_tracepoint_event(const char **strp, 493static enum event_result
494 struct perf_event_attr *attr) 494parse_tracepoint_event(const struct option *opt, const char **strp,
495 struct perf_event_attr *attr)
495{ 496{
496 const char *evt_name; 497 const char *evt_name;
497 char *flags = NULL, *comma_loc; 498 char *flags = NULL, *comma_loc;
@@ -530,7 +531,7 @@ static enum event_result parse_tracepoint_event(const char **strp,
530 return EVT_FAILED; 531 return EVT_FAILED;
531 if (strpbrk(evt_name, "*?")) { 532 if (strpbrk(evt_name, "*?")) {
532 *strp += strlen(sys_name) + evt_length + 1; /* 1 == the ':' */ 533 *strp += strlen(sys_name) + evt_length + 1; /* 1 == the ':' */
533 return parse_multiple_tracepoint_event(sys_name, evt_name, 534 return parse_multiple_tracepoint_event(opt, sys_name, evt_name,
534 flags); 535 flags);
535 } else { 536 } else {
536 return parse_single_tracepoint_event(sys_name, evt_name, 537 return parse_single_tracepoint_event(sys_name, evt_name,
@@ -740,11 +741,12 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr)
740 * Symbolic names are (almost) exactly matched. 741 * Symbolic names are (almost) exactly matched.
741 */ 742 */
742static enum event_result 743static enum event_result
743parse_event_symbols(const char **str, struct perf_event_attr *attr) 744parse_event_symbols(const struct option *opt, const char **str,
745 struct perf_event_attr *attr)
744{ 746{
745 enum event_result ret; 747 enum event_result ret;
746 748
747 ret = parse_tracepoint_event(str, attr); 749 ret = parse_tracepoint_event(opt, str, attr);
748 if (ret != EVT_FAILED) 750 if (ret != EVT_FAILED)
749 goto modifier; 751 goto modifier;
750 752
@@ -778,14 +780,17 @@ modifier:
778 return ret; 780 return ret;
779} 781}
780 782
781int parse_events(const struct option *opt __used, const char *str, int unset __used) 783int parse_events(const struct option *opt, const char *str, int unset __used)
782{ 784{
785 struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
783 struct perf_event_attr attr; 786 struct perf_event_attr attr;
784 enum event_result ret; 787 enum event_result ret;
788 const char *ostr;
785 789
786 for (;;) { 790 for (;;) {
791 ostr = str;
787 memset(&attr, 0, sizeof(attr)); 792 memset(&attr, 0, sizeof(attr));
788 ret = parse_event_symbols(&str, &attr); 793 ret = parse_event_symbols(opt, &str, &attr);
789 if (ret == EVT_FAILED) 794 if (ret == EVT_FAILED)
790 return -1; 795 return -1;
791 796
@@ -794,12 +799,15 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
794 799
795 if (ret != EVT_HANDLED_ALL) { 800 if (ret != EVT_HANDLED_ALL) {
796 struct perf_evsel *evsel; 801 struct perf_evsel *evsel;
797 evsel = perf_evsel__new(&attr, 802 evsel = perf_evsel__new(&attr, evlist->nr_entries);
798 nr_counters);
799 if (evsel == NULL) 803 if (evsel == NULL)
800 return -1; 804 return -1;
801 list_add_tail(&evsel->node, &evsel_list); 805 perf_evlist__add(evlist, evsel);
802 ++nr_counters; 806
807 evsel->name = calloc(str - ostr + 1, 1);
808 if (!evsel->name)
809 return -1;
810 strncpy(evsel->name, ostr, str - ostr);
803 } 811 }
804 812
805 if (*str == 0) 813 if (*str == 0)
@@ -813,13 +821,14 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
813 return 0; 821 return 0;
814} 822}
815 823
816int parse_filter(const struct option *opt __used, const char *str, 824int parse_filter(const struct option *opt, const char *str,
817 int unset __used) 825 int unset __used)
818{ 826{
827 struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
819 struct perf_evsel *last = NULL; 828 struct perf_evsel *last = NULL;
820 829
821 if (!list_empty(&evsel_list)) 830 if (evlist->nr_entries > 0)
822 last = list_entry(evsel_list.prev, struct perf_evsel, node); 831 last = list_entry(evlist->entries.prev, struct perf_evsel, node);
823 832
824 if (last == NULL || last->attr.type != PERF_TYPE_TRACEPOINT) { 833 if (last == NULL || last->attr.type != PERF_TYPE_TRACEPOINT) {
825 fprintf(stderr, 834 fprintf(stderr,
@@ -849,7 +858,7 @@ static const char * const event_type_descriptors[] = {
849 * Print the events from <debugfs_mount_point>/tracing/events 858 * Print the events from <debugfs_mount_point>/tracing/events
850 */ 859 */
851 860
852static void print_tracepoint_events(void) 861void print_tracepoint_events(const char *subsys_glob, const char *event_glob)
853{ 862{
854 DIR *sys_dir, *evt_dir; 863 DIR *sys_dir, *evt_dir;
855 struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; 864 struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent;
@@ -864,6 +873,9 @@ static void print_tracepoint_events(void)
864 return; 873 return;
865 874
866 for_each_subsystem(sys_dir, sys_dirent, sys_next) { 875 for_each_subsystem(sys_dir, sys_dirent, sys_next) {
876 if (subsys_glob != NULL &&
877 !strglobmatch(sys_dirent.d_name, subsys_glob))
878 continue;
867 879
868 snprintf(dir_path, MAXPATHLEN, "%s/%s", debugfs_path, 880 snprintf(dir_path, MAXPATHLEN, "%s/%s", debugfs_path,
869 sys_dirent.d_name); 881 sys_dirent.d_name);
@@ -872,6 +884,10 @@ static void print_tracepoint_events(void)
872 continue; 884 continue;
873 885
874 for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) { 886 for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) {
887 if (event_glob != NULL &&
888 !strglobmatch(evt_dirent.d_name, event_glob))
889 continue;
890
875 snprintf(evt_path, MAXPATHLEN, "%s:%s", 891 snprintf(evt_path, MAXPATHLEN, "%s:%s",
876 sys_dirent.d_name, evt_dirent.d_name); 892 sys_dirent.d_name, evt_dirent.d_name);
877 printf(" %-42s [%s]\n", evt_path, 893 printf(" %-42s [%s]\n", evt_path,
@@ -923,13 +939,61 @@ int is_valid_tracepoint(const char *event_string)
923 return 0; 939 return 0;
924} 940}
925 941
942void print_events_type(u8 type)
943{
944 struct event_symbol *syms = event_symbols;
945 unsigned int i;
946 char name[64];
947
948 for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) {
949 if (type != syms->type)
950 continue;
951
952 if (strlen(syms->alias))
953 snprintf(name, sizeof(name), "%s OR %s",
954 syms->symbol, syms->alias);
955 else
956 snprintf(name, sizeof(name), "%s", syms->symbol);
957
958 printf(" %-42s [%s]\n", name,
959 event_type_descriptors[type]);
960 }
961}
962
963int print_hwcache_events(const char *event_glob)
964{
965 unsigned int type, op, i, printed = 0;
966
967 for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
968 for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
969 /* skip invalid cache type */
970 if (!is_cache_op_valid(type, op))
971 continue;
972
973 for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
974 char *name = event_cache_name(type, op, i);
975
976 if (event_glob != NULL &&
977 !strglobmatch(name, event_glob))
978 continue;
979
980 printf(" %-42s [%s]\n", name,
981 event_type_descriptors[PERF_TYPE_HW_CACHE]);
982 ++printed;
983 }
984 }
985 }
986
987 return printed;
988}
989
926/* 990/*
927 * Print the help text for the event symbols: 991 * Print the help text for the event symbols:
928 */ 992 */
929void print_events(void) 993void print_events(const char *event_glob)
930{ 994{
931 struct event_symbol *syms = event_symbols; 995 struct event_symbol *syms = event_symbols;
932 unsigned int i, type, op, prev_type = -1; 996 unsigned int i, type, prev_type = -1, printed = 0, ntypes_printed = 0;
933 char name[40]; 997 char name[40];
934 998
935 printf("\n"); 999 printf("\n");
@@ -938,8 +1002,16 @@ void print_events(void)
938 for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) { 1002 for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) {
939 type = syms->type; 1003 type = syms->type;
940 1004
941 if (type != prev_type) 1005 if (type != prev_type && printed) {
942 printf("\n"); 1006 printf("\n");
1007 printed = 0;
1008 ntypes_printed++;
1009 }
1010
1011 if (event_glob != NULL &&
1012 !(strglobmatch(syms->symbol, event_glob) ||
1013 (syms->alias && strglobmatch(syms->alias, event_glob))))
1014 continue;
943 1015
944 if (strlen(syms->alias)) 1016 if (strlen(syms->alias))
945 sprintf(name, "%s OR %s", syms->symbol, syms->alias); 1017 sprintf(name, "%s OR %s", syms->symbol, syms->alias);
@@ -949,22 +1021,17 @@ void print_events(void)
949 event_type_descriptors[type]); 1021 event_type_descriptors[type]);
950 1022
951 prev_type = type; 1023 prev_type = type;
1024 ++printed;
952 } 1025 }
953 1026
954 printf("\n"); 1027 if (ntypes_printed) {
955 for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { 1028 printed = 0;
956 for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { 1029 printf("\n");
957 /* skip invalid cache type */
958 if (!is_cache_op_valid(type, op))
959 continue;
960
961 for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
962 printf(" %-42s [%s]\n",
963 event_cache_name(type, op, i),
964 event_type_descriptors[PERF_TYPE_HW_CACHE]);
965 }
966 }
967 } 1030 }
1031 print_hwcache_events(event_glob);
1032
1033 if (event_glob != NULL)
1034 return;
968 1035
969 printf("\n"); 1036 printf("\n");
970 printf(" %-42s [%s]\n", 1037 printf(" %-42s [%s]\n",
@@ -977,37 +1044,7 @@ void print_events(void)
977 event_type_descriptors[PERF_TYPE_BREAKPOINT]); 1044 event_type_descriptors[PERF_TYPE_BREAKPOINT]);
978 printf("\n"); 1045 printf("\n");
979 1046
980 print_tracepoint_events(); 1047 print_tracepoint_events(NULL, NULL);
981 1048
982 exit(129); 1049 exit(129);
983} 1050}
984
985int perf_evsel_list__create_default(void)
986{
987 struct perf_evsel *evsel;
988 struct perf_event_attr attr;
989
990 memset(&attr, 0, sizeof(attr));
991 attr.type = PERF_TYPE_HARDWARE;
992 attr.config = PERF_COUNT_HW_CPU_CYCLES;
993
994 evsel = perf_evsel__new(&attr, 0);
995
996 if (evsel == NULL)
997 return -ENOMEM;
998
999 list_add(&evsel->node, &evsel_list);
1000 ++nr_counters;
1001 return 0;
1002}
1003
1004void perf_evsel_list__delete(void)
1005{
1006 struct perf_evsel *pos, *n;
1007
1008 list_for_each_entry_safe(pos, n, &evsel_list, node) {
1009 list_del_init(&pos->node);
1010 perf_evsel__delete(pos);
1011 }
1012 nr_counters = 0;
1013}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 458e3ecf17af..212f88e07a9c 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -9,11 +9,6 @@
9struct list_head; 9struct list_head;
10struct perf_evsel; 10struct perf_evsel;
11 11
12extern struct list_head evsel_list;
13
14int perf_evsel_list__create_default(void);
15void perf_evsel_list__delete(void);
16
17struct option; 12struct option;
18 13
19struct tracepoint_path { 14struct tracepoint_path {
@@ -25,8 +20,6 @@ struct tracepoint_path {
25extern struct tracepoint_path *tracepoint_id_to_path(u64 config); 20extern struct tracepoint_path *tracepoint_id_to_path(u64 config);
26extern bool have_tracepoints(struct list_head *evlist); 21extern bool have_tracepoints(struct list_head *evlist);
27 22
28extern int nr_counters;
29
30const char *event_name(struct perf_evsel *event); 23const char *event_name(struct perf_evsel *event);
31extern const char *__event_name(int type, u64 config); 24extern const char *__event_name(int type, u64 config);
32 25
@@ -35,7 +28,10 @@ extern int parse_filter(const struct option *opt, const char *str, int unset);
35 28
36#define EVENTS_HELP_MAX (128*1024) 29#define EVENTS_HELP_MAX (128*1024)
37 30
38extern void print_events(void); 31void print_events(const char *event_glob);
32void print_events_type(u8 type);
33void print_tracepoint_events(const char *subsys_glob, const char *event_glob);
34int print_hwcache_events(const char *event_glob);
39extern int is_valid_tracepoint(const char *event_string); 35extern int is_valid_tracepoint(const char *event_string);
40 36
41extern char debugfs_path[]; 37extern char debugfs_path[];
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 6e29d9c9dccc..5ddee66020a7 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -31,6 +31,7 @@
31#include <string.h> 31#include <string.h>
32#include <stdarg.h> 32#include <stdarg.h>
33#include <limits.h> 33#include <limits.h>
34#include <elf.h>
34 35
35#undef _GNU_SOURCE 36#undef _GNU_SOURCE
36#include "util.h" 37#include "util.h"
@@ -111,7 +112,25 @@ static struct symbol *__find_kernel_function_by_name(const char *name,
111 NULL); 112 NULL);
112} 113}
113 114
114const char *kernel_get_module_path(const char *module) 115static struct map *kernel_get_module_map(const char *module)
116{
117 struct rb_node *nd;
118 struct map_groups *grp = &machine.kmaps;
119
120 if (!module)
121 module = "kernel";
122
123 for (nd = rb_first(&grp->maps[MAP__FUNCTION]); nd; nd = rb_next(nd)) {
124 struct map *pos = rb_entry(nd, struct map, rb_node);
125 if (strncmp(pos->dso->short_name + 1, module,
126 pos->dso->short_name_len - 2) == 0) {
127 return pos;
128 }
129 }
130 return NULL;
131}
132
133static struct dso *kernel_get_module_dso(const char *module)
115{ 134{
116 struct dso *dso; 135 struct dso *dso;
117 struct map *map; 136 struct map *map;
@@ -141,7 +160,13 @@ const char *kernel_get_module_path(const char *module)
141 } 160 }
142 } 161 }
143found: 162found:
144 return dso->long_name; 163 return dso;
164}
165
166const char *kernel_get_module_path(const char *module)
167{
168 struct dso *dso = kernel_get_module_dso(module);
169 return (dso) ? dso->long_name : NULL;
145} 170}
146 171
147#ifdef DWARF_SUPPORT 172#ifdef DWARF_SUPPORT
@@ -384,7 +409,7 @@ int show_line_range(struct line_range *lr, const char *module)
384 setup_pager(); 409 setup_pager();
385 410
386 if (lr->function) 411 if (lr->function)
387 fprintf(stdout, "<%s:%d>\n", lr->function, 412 fprintf(stdout, "<%s@%s:%d>\n", lr->function, lr->path,
388 lr->start - lr->offset); 413 lr->start - lr->offset);
389 else 414 else
390 fprintf(stdout, "<%s:%d>\n", lr->path, lr->start); 415 fprintf(stdout, "<%s:%d>\n", lr->path, lr->start);
@@ -426,12 +451,14 @@ end:
426} 451}
427 452
428static int show_available_vars_at(int fd, struct perf_probe_event *pev, 453static int show_available_vars_at(int fd, struct perf_probe_event *pev,
429 int max_vls, bool externs) 454 int max_vls, struct strfilter *_filter,
455 bool externs)
430{ 456{
431 char *buf; 457 char *buf;
432 int ret, i; 458 int ret, i, nvars;
433 struct str_node *node; 459 struct str_node *node;
434 struct variable_list *vls = NULL, *vl; 460 struct variable_list *vls = NULL, *vl;
461 const char *var;
435 462
436 buf = synthesize_perf_probe_point(&pev->point); 463 buf = synthesize_perf_probe_point(&pev->point);
437 if (!buf) 464 if (!buf)
@@ -439,36 +466,45 @@ static int show_available_vars_at(int fd, struct perf_probe_event *pev,
439 pr_debug("Searching variables at %s\n", buf); 466 pr_debug("Searching variables at %s\n", buf);
440 467
441 ret = find_available_vars_at(fd, pev, &vls, max_vls, externs); 468 ret = find_available_vars_at(fd, pev, &vls, max_vls, externs);
442 if (ret > 0) { 469 if (ret <= 0) {
443 /* Some variables were found */ 470 pr_err("Failed to find variables at %s (%d)\n", buf, ret);
444 fprintf(stdout, "Available variables at %s\n", buf); 471 goto end;
445 for (i = 0; i < ret; i++) { 472 }
446 vl = &vls[i]; 473 /* Some variables are found */
447 /* 474 fprintf(stdout, "Available variables at %s\n", buf);
448 * A probe point might be converted to 475 for (i = 0; i < ret; i++) {
449 * several trace points. 476 vl = &vls[i];
450 */ 477 /*
451 fprintf(stdout, "\t@<%s+%lu>\n", vl->point.symbol, 478 * A probe point might be converted to
452 vl->point.offset); 479 * several trace points.
453 free(vl->point.symbol); 480 */
454 if (vl->vars) { 481 fprintf(stdout, "\t@<%s+%lu>\n", vl->point.symbol,
455 strlist__for_each(node, vl->vars) 482 vl->point.offset);
483 free(vl->point.symbol);
484 nvars = 0;
485 if (vl->vars) {
486 strlist__for_each(node, vl->vars) {
487 var = strchr(node->s, '\t') + 1;
488 if (strfilter__compare(_filter, var)) {
456 fprintf(stdout, "\t\t%s\n", node->s); 489 fprintf(stdout, "\t\t%s\n", node->s);
457 strlist__delete(vl->vars); 490 nvars++;
458 } else 491 }
459 fprintf(stdout, "(No variables)\n"); 492 }
493 strlist__delete(vl->vars);
460 } 494 }
461 free(vls); 495 if (nvars == 0)
462 } else 496 fprintf(stdout, "\t\t(No matched variables)\n");
463 pr_err("Failed to find variables at %s (%d)\n", buf, ret); 497 }
464 498 free(vls);
499end:
465 free(buf); 500 free(buf);
466 return ret; 501 return ret;
467} 502}
468 503
469/* Show available variables on given probe point */ 504/* Show available variables on given probe point */
470int show_available_vars(struct perf_probe_event *pevs, int npevs, 505int show_available_vars(struct perf_probe_event *pevs, int npevs,
471 int max_vls, const char *module, bool externs) 506 int max_vls, const char *module,
507 struct strfilter *_filter, bool externs)
472{ 508{
473 int i, fd, ret = 0; 509 int i, fd, ret = 0;
474 510
@@ -485,7 +521,8 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs,
485 setup_pager(); 521 setup_pager();
486 522
487 for (i = 0; i < npevs && ret >= 0; i++) 523 for (i = 0; i < npevs && ret >= 0; i++)
488 ret = show_available_vars_at(fd, &pevs[i], max_vls, externs); 524 ret = show_available_vars_at(fd, &pevs[i], max_vls, _filter,
525 externs);
489 526
490 close(fd); 527 close(fd);
491 return ret; 528 return ret;
@@ -531,7 +568,9 @@ int show_line_range(struct line_range *lr __unused, const char *module __unused)
531 568
532int show_available_vars(struct perf_probe_event *pevs __unused, 569int show_available_vars(struct perf_probe_event *pevs __unused,
533 int npevs __unused, int max_vls __unused, 570 int npevs __unused, int max_vls __unused,
534 const char *module __unused, bool externs __unused) 571 const char *module __unused,
572 struct strfilter *filter __unused,
573 bool externs __unused)
535{ 574{
536 pr_warning("Debuginfo-analysis is not supported.\n"); 575 pr_warning("Debuginfo-analysis is not supported.\n");
537 return -ENOSYS; 576 return -ENOSYS;
@@ -556,11 +595,11 @@ static int parse_line_num(char **ptr, int *val, const char *what)
556 * The line range syntax is described by: 595 * The line range syntax is described by:
557 * 596 *
558 * SRC[:SLN[+NUM|-ELN]] 597 * SRC[:SLN[+NUM|-ELN]]
559 * FNC[:SLN[+NUM|-ELN]] 598 * FNC[@SRC][:SLN[+NUM|-ELN]]
560 */ 599 */
561int parse_line_range_desc(const char *arg, struct line_range *lr) 600int parse_line_range_desc(const char *arg, struct line_range *lr)
562{ 601{
563 char *range, *name = strdup(arg); 602 char *range, *file, *name = strdup(arg);
564 int err; 603 int err;
565 604
566 if (!name) 605 if (!name)
@@ -610,7 +649,16 @@ int parse_line_range_desc(const char *arg, struct line_range *lr)
610 } 649 }
611 } 650 }
612 651
613 if (strchr(name, '.')) 652 file = strchr(name, '@');
653 if (file) {
654 *file = '\0';
655 lr->file = strdup(++file);
656 if (lr->file == NULL) {
657 err = -ENOMEM;
658 goto err;
659 }
660 lr->function = name;
661 } else if (strchr(name, '.'))
614 lr->file = name; 662 lr->file = name;
615 else 663 else
616 lr->function = name; 664 lr->function = name;
@@ -1784,9 +1832,12 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
1784 } 1832 }
1785 1833
1786 /* Loop 2: add all events */ 1834 /* Loop 2: add all events */
1787 for (i = 0; i < npevs && ret >= 0; i++) 1835 for (i = 0; i < npevs; i++) {
1788 ret = __add_probe_trace_events(pkgs[i].pev, pkgs[i].tevs, 1836 ret = __add_probe_trace_events(pkgs[i].pev, pkgs[i].tevs,
1789 pkgs[i].ntevs, force_add); 1837 pkgs[i].ntevs, force_add);
1838 if (ret < 0)
1839 break;
1840 }
1790end: 1841end:
1791 /* Loop 3: cleanup and free trace events */ 1842 /* Loop 3: cleanup and free trace events */
1792 for (i = 0; i < npevs; i++) { 1843 for (i = 0; i < npevs; i++) {
@@ -1912,4 +1963,46 @@ int del_perf_probe_events(struct strlist *dellist)
1912 1963
1913 return ret; 1964 return ret;
1914} 1965}
1966/* TODO: don't use a global variable for filter ... */
1967static struct strfilter *available_func_filter;
1968
1969/*
1970 * If a symbol corresponds to a function with global binding and
1971 * matches filter return 0. For all others return 1.
1972 */
1973static int filter_available_functions(struct map *map __unused,
1974 struct symbol *sym)
1975{
1976 if (sym->binding == STB_GLOBAL &&
1977 strfilter__compare(available_func_filter, sym->name))
1978 return 0;
1979 return 1;
1980}
1981
1982int show_available_funcs(const char *module, struct strfilter *_filter)
1983{
1984 struct map *map;
1985 int ret;
1986
1987 setup_pager();
1988
1989 ret = init_vmlinux();
1990 if (ret < 0)
1991 return ret;
1915 1992
1993 map = kernel_get_module_map(module);
1994 if (!map) {
1995 pr_err("Failed to find %s map.\n", (module) ? : "kernel");
1996 return -EINVAL;
1997 }
1998 available_func_filter = _filter;
1999 if (map__load(map, filter_available_functions)) {
2000 pr_err("Failed to load map.\n");
2001 return -EINVAL;
2002 }
2003 if (!dso__sorted_by_name(map->dso, map->type))
2004 dso__sort_by_name(map->dso, map->type);
2005
2006 dso__fprintf_symbols_by_name(map->dso, map->type, stdout);
2007 return 0;
2008}
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 5accbedfea37..3434fc9d79d5 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -3,6 +3,7 @@
3 3
4#include <stdbool.h> 4#include <stdbool.h>
5#include "strlist.h" 5#include "strlist.h"
6#include "strfilter.h"
6 7
7extern bool probe_event_dry_run; 8extern bool probe_event_dry_run;
8 9
@@ -126,7 +127,8 @@ extern int show_perf_probe_events(void);
126extern int show_line_range(struct line_range *lr, const char *module); 127extern int show_line_range(struct line_range *lr, const char *module);
127extern int show_available_vars(struct perf_probe_event *pevs, int npevs, 128extern int show_available_vars(struct perf_probe_event *pevs, int npevs,
128 int max_probe_points, const char *module, 129 int max_probe_points, const char *module,
129 bool externs); 130 struct strfilter *filter, bool externs);
131extern int show_available_funcs(const char *module, struct strfilter *filter);
130 132
131 133
132/* Maximum index number of event-name postfix */ 134/* Maximum index number of event-name postfix */
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index ab83b6ac5d65..194f9e2a3285 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -33,6 +33,7 @@
33#include <ctype.h> 33#include <ctype.h>
34#include <dwarf-regs.h> 34#include <dwarf-regs.h>
35 35
36#include <linux/bitops.h>
36#include "event.h" 37#include "event.h"
37#include "debug.h" 38#include "debug.h"
38#include "util.h" 39#include "util.h"
@@ -280,6 +281,19 @@ static bool die_compare_name(Dwarf_Die *dw_die, const char *tname)
280 return name ? (strcmp(tname, name) == 0) : false; 281 return name ? (strcmp(tname, name) == 0) : false;
281} 282}
282 283
284/* Get callsite line number of inline-function instance */
285static int die_get_call_lineno(Dwarf_Die *in_die)
286{
287 Dwarf_Attribute attr;
288 Dwarf_Word ret;
289
290 if (!dwarf_attr(in_die, DW_AT_call_line, &attr))
291 return -ENOENT;
292
293 dwarf_formudata(&attr, &ret);
294 return (int)ret;
295}
296
283/* Get type die */ 297/* Get type die */
284static Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem) 298static Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
285{ 299{
@@ -320,13 +334,23 @@ static Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
320 return vr_die; 334 return vr_die;
321} 335}
322 336
323static bool die_is_signed_type(Dwarf_Die *tp_die) 337static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name,
338 Dwarf_Word *result)
324{ 339{
325 Dwarf_Attribute attr; 340 Dwarf_Attribute attr;
341
342 if (dwarf_attr(tp_die, attr_name, &attr) == NULL ||
343 dwarf_formudata(&attr, result) != 0)
344 return -ENOENT;
345
346 return 0;
347}
348
349static bool die_is_signed_type(Dwarf_Die *tp_die)
350{
326 Dwarf_Word ret; 351 Dwarf_Word ret;
327 352
328 if (dwarf_attr(tp_die, DW_AT_encoding, &attr) == NULL || 353 if (die_get_attr_udata(tp_die, DW_AT_encoding, &ret))
329 dwarf_formudata(&attr, &ret) != 0)
330 return false; 354 return false;
331 355
332 return (ret == DW_ATE_signed_char || ret == DW_ATE_signed || 356 return (ret == DW_ATE_signed_char || ret == DW_ATE_signed ||
@@ -335,11 +359,29 @@ static bool die_is_signed_type(Dwarf_Die *tp_die)
335 359
336static int die_get_byte_size(Dwarf_Die *tp_die) 360static int die_get_byte_size(Dwarf_Die *tp_die)
337{ 361{
338 Dwarf_Attribute attr;
339 Dwarf_Word ret; 362 Dwarf_Word ret;
340 363
341 if (dwarf_attr(tp_die, DW_AT_byte_size, &attr) == NULL || 364 if (die_get_attr_udata(tp_die, DW_AT_byte_size, &ret))
342 dwarf_formudata(&attr, &ret) != 0) 365 return 0;
366
367 return (int)ret;
368}
369
370static int die_get_bit_size(Dwarf_Die *tp_die)
371{
372 Dwarf_Word ret;
373
374 if (die_get_attr_udata(tp_die, DW_AT_bit_size, &ret))
375 return 0;
376
377 return (int)ret;
378}
379
380static int die_get_bit_offset(Dwarf_Die *tp_die)
381{
382 Dwarf_Word ret;
383
384 if (die_get_attr_udata(tp_die, DW_AT_bit_offset, &ret))
343 return 0; 385 return 0;
344 386
345 return (int)ret; 387 return (int)ret;
@@ -458,6 +500,151 @@ static Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
458 return die_find_child(sp_die, __die_find_inline_cb, &addr, die_mem); 500 return die_find_child(sp_die, __die_find_inline_cb, &addr, die_mem);
459} 501}
460 502
503/* Walker on lines (Note: line number will not be sorted) */
504typedef int (* line_walk_handler_t) (const char *fname, int lineno,
505 Dwarf_Addr addr, void *data);
506
507struct __line_walk_param {
508 const char *fname;
509 line_walk_handler_t handler;
510 void *data;
511 int retval;
512};
513
514static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data)
515{
516 struct __line_walk_param *lw = data;
517 Dwarf_Addr addr;
518 int lineno;
519
520 if (dwarf_tag(in_die) == DW_TAG_inlined_subroutine) {
521 lineno = die_get_call_lineno(in_die);
522 if (lineno > 0 && dwarf_entrypc(in_die, &addr) == 0) {
523 lw->retval = lw->handler(lw->fname, lineno, addr,
524 lw->data);
525 if (lw->retval != 0)
526 return DIE_FIND_CB_FOUND;
527 }
528 }
529 return DIE_FIND_CB_SIBLING;
530}
531
532/* Walk on lines of blocks included in given DIE */
533static int __die_walk_funclines(Dwarf_Die *sp_die,
534 line_walk_handler_t handler, void *data)
535{
536 struct __line_walk_param lw = {
537 .handler = handler,
538 .data = data,
539 .retval = 0,
540 };
541 Dwarf_Die die_mem;
542 Dwarf_Addr addr;
543 int lineno;
544
545 /* Handle function declaration line */
546 lw.fname = dwarf_decl_file(sp_die);
547 if (lw.fname && dwarf_decl_line(sp_die, &lineno) == 0 &&
548 dwarf_entrypc(sp_die, &addr) == 0) {
549 lw.retval = handler(lw.fname, lineno, addr, data);
550 if (lw.retval != 0)
551 goto done;
552 }
553 die_find_child(sp_die, __die_walk_funclines_cb, &lw, &die_mem);
554done:
555 return lw.retval;
556}
557
558static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data)
559{
560 struct __line_walk_param *lw = data;
561
562 lw->retval = __die_walk_funclines(sp_die, lw->handler, lw->data);
563 if (lw->retval != 0)
564 return DWARF_CB_ABORT;
565
566 return DWARF_CB_OK;
567}
568
569/*
570 * Walk on lines inside given PDIE. If the PDIE is subprogram, walk only on
571 * the lines inside the subprogram, otherwise PDIE must be a CU DIE.
572 */
573static int die_walk_lines(Dwarf_Die *pdie, line_walk_handler_t handler,
574 void *data)
575{
576 Dwarf_Lines *lines;
577 Dwarf_Line *line;
578 Dwarf_Addr addr;
579 const char *fname;
580 int lineno, ret = 0;
581 Dwarf_Die die_mem, *cu_die;
582 size_t nlines, i;
583
584 /* Get the CU die */
585 if (dwarf_tag(pdie) == DW_TAG_subprogram)
586 cu_die = dwarf_diecu(pdie, &die_mem, NULL, NULL);
587 else
588 cu_die = pdie;
589 if (!cu_die) {
590 pr_debug2("Failed to get CU from subprogram\n");
591 return -EINVAL;
592 }
593
594 /* Get lines list in the CU */
595 if (dwarf_getsrclines(cu_die, &lines, &nlines) != 0) {
596 pr_debug2("Failed to get source lines on this CU.\n");
597 return -ENOENT;
598 }
599 pr_debug2("Get %zd lines from this CU\n", nlines);
600
601 /* Walk on the lines on lines list */
602 for (i = 0; i < nlines; i++) {
603 line = dwarf_onesrcline(lines, i);
604 if (line == NULL ||
605 dwarf_lineno(line, &lineno) != 0 ||
606 dwarf_lineaddr(line, &addr) != 0) {
607 pr_debug2("Failed to get line info. "
608 "Possible error in debuginfo.\n");
609 continue;
610 }
611 /* Filter lines based on address */
612 if (pdie != cu_die)
613 /*
614 * Address filtering
615 * The line is included in given function, and
616 * no inline block includes it.
617 */
618 if (!dwarf_haspc(pdie, addr) ||
619 die_find_inlinefunc(pdie, addr, &die_mem))
620 continue;
621 /* Get source line */
622 fname = dwarf_linesrc(line, NULL, NULL);
623
624 ret = handler(fname, lineno, addr, data);
625 if (ret != 0)
626 return ret;
627 }
628
629 /*
630 * Dwarf lines doesn't include function declarations and inlined
631 * subroutines. We have to check functions list or given function.
632 */
633 if (pdie != cu_die)
634 ret = __die_walk_funclines(pdie, handler, data);
635 else {
636 struct __line_walk_param param = {
637 .handler = handler,
638 .data = data,
639 .retval = 0,
640 };
641 dwarf_getfuncs(cu_die, __die_walk_culines_cb, &param, 0);
642 ret = param.retval;
643 }
644
645 return ret;
646}
647
461struct __find_variable_param { 648struct __find_variable_param {
462 const char *name; 649 const char *name;
463 Dwarf_Addr addr; 650 Dwarf_Addr addr;
@@ -669,6 +856,8 @@ static_var:
669 return 0; 856 return 0;
670} 857}
671 858
859#define BYTES_TO_BITS(nb) ((nb) * BITS_PER_LONG / sizeof(long))
860
672static int convert_variable_type(Dwarf_Die *vr_die, 861static int convert_variable_type(Dwarf_Die *vr_die,
673 struct probe_trace_arg *tvar, 862 struct probe_trace_arg *tvar,
674 const char *cast) 863 const char *cast)
@@ -685,6 +874,14 @@ static int convert_variable_type(Dwarf_Die *vr_die,
685 return (tvar->type == NULL) ? -ENOMEM : 0; 874 return (tvar->type == NULL) ? -ENOMEM : 0;
686 } 875 }
687 876
877 if (die_get_bit_size(vr_die) != 0) {
878 /* This is a bitfield */
879 ret = snprintf(buf, 16, "b%d@%d/%zd", die_get_bit_size(vr_die),
880 die_get_bit_offset(vr_die),
881 BYTES_TO_BITS(die_get_byte_size(vr_die)));
882 goto formatted;
883 }
884
688 if (die_get_real_type(vr_die, &type) == NULL) { 885 if (die_get_real_type(vr_die, &type) == NULL) {
689 pr_warning("Failed to get a type information of %s.\n", 886 pr_warning("Failed to get a type information of %s.\n",
690 dwarf_diename(vr_die)); 887 dwarf_diename(vr_die));
@@ -729,29 +926,31 @@ static int convert_variable_type(Dwarf_Die *vr_die,
729 return (tvar->type == NULL) ? -ENOMEM : 0; 926 return (tvar->type == NULL) ? -ENOMEM : 0;
730 } 927 }
731 928
732 ret = die_get_byte_size(&type) * 8; 929 ret = BYTES_TO_BITS(die_get_byte_size(&type));
733 if (ret) { 930 if (!ret)
734 /* Check the bitwidth */ 931 /* No size ... try to use default type */
735 if (ret > MAX_BASIC_TYPE_BITS) { 932 return 0;
736 pr_info("%s exceeds max-bitwidth."
737 " Cut down to %d bits.\n",
738 dwarf_diename(&type), MAX_BASIC_TYPE_BITS);
739 ret = MAX_BASIC_TYPE_BITS;
740 }
741 933
742 ret = snprintf(buf, 16, "%c%d", 934 /* Check the bitwidth */
743 die_is_signed_type(&type) ? 's' : 'u', ret); 935 if (ret > MAX_BASIC_TYPE_BITS) {
744 if (ret < 0 || ret >= 16) { 936 pr_info("%s exceeds max-bitwidth. Cut down to %d bits.\n",
745 if (ret >= 16) 937 dwarf_diename(&type), MAX_BASIC_TYPE_BITS);
746 ret = -E2BIG; 938 ret = MAX_BASIC_TYPE_BITS;
747 pr_warning("Failed to convert variable type: %s\n", 939 }
748 strerror(-ret)); 940 ret = snprintf(buf, 16, "%c%d",
749 return ret; 941 die_is_signed_type(&type) ? 's' : 'u', ret);
750 } 942
751 tvar->type = strdup(buf); 943formatted:
752 if (tvar->type == NULL) 944 if (ret < 0 || ret >= 16) {
753 return -ENOMEM; 945 if (ret >= 16)
946 ret = -E2BIG;
947 pr_warning("Failed to convert variable type: %s\n",
948 strerror(-ret));
949 return ret;
754 } 950 }
951 tvar->type = strdup(buf);
952 if (tvar->type == NULL)
953 return -ENOMEM;
755 return 0; 954 return 0;
756} 955}
757 956
@@ -1050,157 +1249,102 @@ static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf)
1050 return ret; 1249 return ret;
1051} 1250}
1052 1251
1053/* Find probe point from its line number */ 1252static int probe_point_line_walker(const char *fname, int lineno,
1054static int find_probe_point_by_line(struct probe_finder *pf) 1253 Dwarf_Addr addr, void *data)
1055{ 1254{
1056 Dwarf_Lines *lines; 1255 struct probe_finder *pf = data;
1057 Dwarf_Line *line; 1256 int ret;
1058 size_t nlines, i;
1059 Dwarf_Addr addr;
1060 int lineno;
1061 int ret = 0;
1062
1063 if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) {
1064 pr_warning("No source lines found.\n");
1065 return -ENOENT;
1066 }
1067 1257
1068 for (i = 0; i < nlines && ret == 0; i++) { 1258 if (lineno != pf->lno || strtailcmp(fname, pf->fname) != 0)
1069 line = dwarf_onesrcline(lines, i); 1259 return 0;
1070 if (dwarf_lineno(line, &lineno) != 0 ||
1071 lineno != pf->lno)
1072 continue;
1073 1260
1074 /* TODO: Get fileno from line, but how? */ 1261 pf->addr = addr;
1075 if (strtailcmp(dwarf_linesrc(line, NULL, NULL), pf->fname) != 0) 1262 ret = call_probe_finder(NULL, pf);
1076 continue;
1077 1263
1078 if (dwarf_lineaddr(line, &addr) != 0) { 1264 /* Continue if no error, because the line will be in inline function */
1079 pr_warning("Failed to get the address of the line.\n"); 1265 return ret < 0 ? ret : 0;
1080 return -ENOENT; 1266}
1081 }
1082 pr_debug("Probe line found: line[%d]:%d addr:0x%jx\n",
1083 (int)i, lineno, (uintmax_t)addr);
1084 pf->addr = addr;
1085 1267
1086 ret = call_probe_finder(NULL, pf); 1268/* Find probe point from its line number */
1087 /* Continuing, because target line might be inlined. */ 1269static int find_probe_point_by_line(struct probe_finder *pf)
1088 } 1270{
1089 return ret; 1271 return die_walk_lines(&pf->cu_die, probe_point_line_walker, pf);
1090} 1272}
1091 1273
1092/* Find lines which match lazy pattern */ 1274/* Find lines which match lazy pattern */
1093static int find_lazy_match_lines(struct list_head *head, 1275static int find_lazy_match_lines(struct list_head *head,
1094 const char *fname, const char *pat) 1276 const char *fname, const char *pat)
1095{ 1277{
1096 char *fbuf, *p1, *p2; 1278 FILE *fp;
1097 int fd, line, nlines = -1; 1279 char *line = NULL;
1098 struct stat st; 1280 size_t line_len;
1099 1281 ssize_t len;
1100 fd = open(fname, O_RDONLY); 1282 int count = 0, linenum = 1;
1101 if (fd < 0) { 1283
1102 pr_warning("Failed to open %s: %s\n", fname, strerror(-fd)); 1284 fp = fopen(fname, "r");
1285 if (!fp) {
1286 pr_warning("Failed to open %s: %s\n", fname, strerror(errno));
1103 return -errno; 1287 return -errno;
1104 } 1288 }
1105 1289
1106 if (fstat(fd, &st) < 0) { 1290 while ((len = getline(&line, &line_len, fp)) > 0) {
1107 pr_warning("Failed to get the size of %s: %s\n", 1291
1108 fname, strerror(errno)); 1292 if (line[len - 1] == '\n')
1109 nlines = -errno; 1293 line[len - 1] = '\0';
1110 goto out_close; 1294
1111 } 1295 if (strlazymatch(line, pat)) {
1112 1296 line_list__add_line(head, linenum);
1113 nlines = -ENOMEM; 1297 count++;
1114 fbuf = malloc(st.st_size + 2);
1115 if (fbuf == NULL)
1116 goto out_close;
1117 if (read(fd, fbuf, st.st_size) < 0) {
1118 pr_warning("Failed to read %s: %s\n", fname, strerror(errno));
1119 nlines = -errno;
1120 goto out_free_fbuf;
1121 }
1122 fbuf[st.st_size] = '\n'; /* Dummy line */
1123 fbuf[st.st_size + 1] = '\0';
1124 p1 = fbuf;
1125 line = 1;
1126 nlines = 0;
1127 while ((p2 = strchr(p1, '\n')) != NULL) {
1128 *p2 = '\0';
1129 if (strlazymatch(p1, pat)) {
1130 line_list__add_line(head, line);
1131 nlines++;
1132 } 1298 }
1133 line++; 1299 linenum++;
1134 p1 = p2 + 1;
1135 } 1300 }
1136out_free_fbuf: 1301
1137 free(fbuf); 1302 if (ferror(fp))
1138out_close: 1303 count = -errno;
1139 close(fd); 1304 free(line);
1140 return nlines; 1305 fclose(fp);
1306
1307 if (count == 0)
1308 pr_debug("No matched lines found in %s.\n", fname);
1309 return count;
1310}
1311
1312static int probe_point_lazy_walker(const char *fname, int lineno,
1313 Dwarf_Addr addr, void *data)
1314{
1315 struct probe_finder *pf = data;
1316 int ret;
1317
1318 if (!line_list__has_line(&pf->lcache, lineno) ||
1319 strtailcmp(fname, pf->fname) != 0)
1320 return 0;
1321
1322 pr_debug("Probe line found: line:%d addr:0x%llx\n",
1323 lineno, (unsigned long long)addr);
1324 pf->addr = addr;
1325 ret = call_probe_finder(NULL, pf);
1326
1327 /*
1328 * Continue if no error, because the lazy pattern will match
1329 * to other lines
1330 */
1331 return ret < 0 ? ret : 0;
1141} 1332}
1142 1333
1143/* Find probe points from lazy pattern */ 1334/* Find probe points from lazy pattern */
1144static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) 1335static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
1145{ 1336{
1146 Dwarf_Lines *lines;
1147 Dwarf_Line *line;
1148 size_t nlines, i;
1149 Dwarf_Addr addr;
1150 Dwarf_Die die_mem;
1151 int lineno;
1152 int ret = 0; 1337 int ret = 0;
1153 1338
1154 if (list_empty(&pf->lcache)) { 1339 if (list_empty(&pf->lcache)) {
1155 /* Matching lazy line pattern */ 1340 /* Matching lazy line pattern */
1156 ret = find_lazy_match_lines(&pf->lcache, pf->fname, 1341 ret = find_lazy_match_lines(&pf->lcache, pf->fname,
1157 pf->pev->point.lazy_line); 1342 pf->pev->point.lazy_line);
1158 if (ret == 0) { 1343 if (ret <= 0)
1159 pr_debug("No matched lines found in %s.\n", pf->fname);
1160 return 0;
1161 } else if (ret < 0)
1162 return ret; 1344 return ret;
1163 } 1345 }
1164 1346
1165 if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) { 1347 return die_walk_lines(sp_die, probe_point_lazy_walker, pf);
1166 pr_warning("No source lines found.\n");
1167 return -ENOENT;
1168 }
1169
1170 for (i = 0; i < nlines && ret >= 0; i++) {
1171 line = dwarf_onesrcline(lines, i);
1172
1173 if (dwarf_lineno(line, &lineno) != 0 ||
1174 !line_list__has_line(&pf->lcache, lineno))
1175 continue;
1176
1177 /* TODO: Get fileno from line, but how? */
1178 if (strtailcmp(dwarf_linesrc(line, NULL, NULL), pf->fname) != 0)
1179 continue;
1180
1181 if (dwarf_lineaddr(line, &addr) != 0) {
1182 pr_debug("Failed to get the address of line %d.\n",
1183 lineno);
1184 continue;
1185 }
1186 if (sp_die) {
1187 /* Address filtering 1: does sp_die include addr? */
1188 if (!dwarf_haspc(sp_die, addr))
1189 continue;
1190 /* Address filtering 2: No child include addr? */
1191 if (die_find_inlinefunc(sp_die, addr, &die_mem))
1192 continue;
1193 }
1194
1195 pr_debug("Probe line found: line[%d]:%d addr:0x%llx\n",
1196 (int)i, lineno, (unsigned long long)addr);
1197 pf->addr = addr;
1198
1199 ret = call_probe_finder(sp_die, pf);
1200 /* Continuing, because target line might be inlined. */
1201 }
1202 /* TODO: deallocate lines, but how? */
1203 return ret;
1204} 1348}
1205 1349
1206/* Callback parameter with return value */ 1350/* Callback parameter with return value */
@@ -1318,8 +1462,7 @@ static int find_probes(int fd, struct probe_finder *pf)
1318 off = 0; 1462 off = 0;
1319 line_list__init(&pf->lcache); 1463 line_list__init(&pf->lcache);
1320 /* Loop on CUs (Compilation Unit) */ 1464 /* Loop on CUs (Compilation Unit) */
1321 while (!dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL) && 1465 while (!dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL)) {
1322 ret >= 0) {
1323 /* Get the DIE(Debugging Information Entry) of this CU */ 1466 /* Get the DIE(Debugging Information Entry) of this CU */
1324 diep = dwarf_offdie(dbg, off + cuhl, &pf->cu_die); 1467 diep = dwarf_offdie(dbg, off + cuhl, &pf->cu_die);
1325 if (!diep) 1468 if (!diep)
@@ -1340,6 +1483,8 @@ static int find_probes(int fd, struct probe_finder *pf)
1340 pf->lno = pp->line; 1483 pf->lno = pp->line;
1341 ret = find_probe_point_by_line(pf); 1484 ret = find_probe_point_by_line(pf);
1342 } 1485 }
1486 if (ret < 0)
1487 break;
1343 } 1488 }
1344 off = noff; 1489 off = noff;
1345 } 1490 }
@@ -1644,91 +1789,28 @@ static int line_range_add_line(const char *src, unsigned int lineno,
1644 return line_list__add_line(&lr->line_list, lineno); 1789 return line_list__add_line(&lr->line_list, lineno);
1645} 1790}
1646 1791
1647/* Search function declaration lines */ 1792static int line_range_walk_cb(const char *fname, int lineno,
1648static int line_range_funcdecl_cb(Dwarf_Die *sp_die, void *data) 1793 Dwarf_Addr addr __used,
1794 void *data)
1649{ 1795{
1650 struct dwarf_callback_param *param = data; 1796 struct line_finder *lf = data;
1651 struct line_finder *lf = param->data;
1652 const char *src;
1653 int lineno;
1654 1797
1655 src = dwarf_decl_file(sp_die); 1798 if ((strtailcmp(fname, lf->fname) != 0) ||
1656 if (src && strtailcmp(src, lf->fname) != 0)
1657 return DWARF_CB_OK;
1658
1659 if (dwarf_decl_line(sp_die, &lineno) != 0 ||
1660 (lf->lno_s > lineno || lf->lno_e < lineno)) 1799 (lf->lno_s > lineno || lf->lno_e < lineno))
1661 return DWARF_CB_OK; 1800 return 0;
1662 1801
1663 param->retval = line_range_add_line(src, lineno, lf->lr); 1802 if (line_range_add_line(fname, lineno, lf->lr) < 0)
1664 if (param->retval < 0) 1803 return -EINVAL;
1665 return DWARF_CB_ABORT;
1666 return DWARF_CB_OK;
1667}
1668 1804
1669static int find_line_range_func_decl_lines(struct line_finder *lf) 1805 return 0;
1670{
1671 struct dwarf_callback_param param = {.data = (void *)lf, .retval = 0};
1672 dwarf_getfuncs(&lf->cu_die, line_range_funcdecl_cb, &param, 0);
1673 return param.retval;
1674} 1806}
1675 1807
1676/* Find line range from its line number */ 1808/* Find line range from its line number */
1677static int find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf) 1809static int find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf)
1678{ 1810{
1679 Dwarf_Lines *lines; 1811 int ret;
1680 Dwarf_Line *line;
1681 size_t nlines, i;
1682 Dwarf_Addr addr;
1683 int lineno, ret = 0;
1684 const char *src;
1685 Dwarf_Die die_mem;
1686
1687 line_list__init(&lf->lr->line_list);
1688 if (dwarf_getsrclines(&lf->cu_die, &lines, &nlines) != 0) {
1689 pr_warning("No source lines found.\n");
1690 return -ENOENT;
1691 }
1692
1693 /* Search probable lines on lines list */
1694 for (i = 0; i < nlines; i++) {
1695 line = dwarf_onesrcline(lines, i);
1696 if (dwarf_lineno(line, &lineno) != 0 ||
1697 (lf->lno_s > lineno || lf->lno_e < lineno))
1698 continue;
1699
1700 if (sp_die) {
1701 /* Address filtering 1: does sp_die include addr? */
1702 if (dwarf_lineaddr(line, &addr) != 0 ||
1703 !dwarf_haspc(sp_die, addr))
1704 continue;
1705
1706 /* Address filtering 2: No child include addr? */
1707 if (die_find_inlinefunc(sp_die, addr, &die_mem))
1708 continue;
1709 }
1710
1711 /* TODO: Get fileno from line, but how? */
1712 src = dwarf_linesrc(line, NULL, NULL);
1713 if (strtailcmp(src, lf->fname) != 0)
1714 continue;
1715
1716 ret = line_range_add_line(src, lineno, lf->lr);
1717 if (ret < 0)
1718 return ret;
1719 }
1720 1812
1721 /* 1813 ret = die_walk_lines(sp_die ?: &lf->cu_die, line_range_walk_cb, lf);
1722 * Dwarf lines doesn't include function declarations. We have to
1723 * check functions list or given function.
1724 */
1725 if (sp_die) {
1726 src = dwarf_decl_file(sp_die);
1727 if (src && dwarf_decl_line(sp_die, &lineno) == 0 &&
1728 (lf->lno_s <= lineno && lf->lno_e >= lineno))
1729 ret = line_range_add_line(src, lineno, lf->lr);
1730 } else
1731 ret = find_line_range_func_decl_lines(lf);
1732 1814
1733 /* Update status */ 1815 /* Update status */
1734 if (ret >= 0) 1816 if (ret >= 0)
@@ -1758,9 +1840,6 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
1758 struct line_finder *lf = param->data; 1840 struct line_finder *lf = param->data;
1759 struct line_range *lr = lf->lr; 1841 struct line_range *lr = lf->lr;
1760 1842
1761 pr_debug("find (%llx) %s\n",
1762 (unsigned long long)dwarf_dieoffset(sp_die),
1763 dwarf_diename(sp_die));
1764 if (dwarf_tag(sp_die) == DW_TAG_subprogram && 1843 if (dwarf_tag(sp_die) == DW_TAG_subprogram &&
1765 die_compare_name(sp_die, lr->function)) { 1844 die_compare_name(sp_die, lr->function)) {
1766 lf->fname = dwarf_decl_file(sp_die); 1845 lf->fname = dwarf_decl_file(sp_die);
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
new file mode 100644
index 000000000000..a9f2d7e1204d
--- /dev/null
+++ b/tools/perf/util/python.c
@@ -0,0 +1,896 @@
1#include <Python.h>
2#include <structmember.h>
3#include <inttypes.h>
4#include <poll.h>
5#include "evlist.h"
6#include "evsel.h"
7#include "event.h"
8#include "cpumap.h"
9#include "thread_map.h"
10
11/* Define PyVarObject_HEAD_INIT for python 2.5 */
12#ifndef PyVarObject_HEAD_INIT
13# define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
14#endif
15
16struct throttle_event {
17 struct perf_event_header header;
18 u64 time;
19 u64 id;
20 u64 stream_id;
21};
22
23PyMODINIT_FUNC initperf(void);
24
25#define member_def(type, member, ptype, help) \
26 { #member, ptype, \
27 offsetof(struct pyrf_event, event) + offsetof(struct type, member), \
28 0, help }
29
30#define sample_member_def(name, member, ptype, help) \
31 { #name, ptype, \
32 offsetof(struct pyrf_event, sample) + offsetof(struct perf_sample, member), \
33 0, help }
34
35struct pyrf_event {
36 PyObject_HEAD
37 struct perf_sample sample;
38 union perf_event event;
39};
40
41#define sample_members \
42 sample_member_def(sample_ip, ip, T_ULONGLONG, "event type"), \
43 sample_member_def(sample_pid, pid, T_INT, "event pid"), \
44 sample_member_def(sample_tid, tid, T_INT, "event tid"), \
45 sample_member_def(sample_time, time, T_ULONGLONG, "event timestamp"), \
46 sample_member_def(sample_addr, addr, T_ULONGLONG, "event addr"), \
47 sample_member_def(sample_id, id, T_ULONGLONG, "event id"), \
48 sample_member_def(sample_stream_id, stream_id, T_ULONGLONG, "event stream id"), \
49 sample_member_def(sample_period, period, T_ULONGLONG, "event period"), \
50 sample_member_def(sample_cpu, cpu, T_UINT, "event cpu"),
51
52static char pyrf_mmap_event__doc[] = PyDoc_STR("perf mmap event object.");
53
54static PyMemberDef pyrf_mmap_event__members[] = {
55 sample_members
56 member_def(perf_event_header, type, T_UINT, "event type"),
57 member_def(mmap_event, pid, T_UINT, "event pid"),
58 member_def(mmap_event, tid, T_UINT, "event tid"),
59 member_def(mmap_event, start, T_ULONGLONG, "start of the map"),
60 member_def(mmap_event, len, T_ULONGLONG, "map length"),
61 member_def(mmap_event, pgoff, T_ULONGLONG, "page offset"),
62 member_def(mmap_event, filename, T_STRING_INPLACE, "backing store"),
63 { .name = NULL, },
64};
65
66static PyObject *pyrf_mmap_event__repr(struct pyrf_event *pevent)
67{
68 PyObject *ret;
69 char *s;
70
71 if (asprintf(&s, "{ type: mmap, pid: %u, tid: %u, start: %#" PRIx64 ", "
72 "length: %#" PRIx64 ", offset: %#" PRIx64 ", "
73 "filename: %s }",
74 pevent->event.mmap.pid, pevent->event.mmap.tid,
75 pevent->event.mmap.start, pevent->event.mmap.len,
76 pevent->event.mmap.pgoff, pevent->event.mmap.filename) < 0) {
77 ret = PyErr_NoMemory();
78 } else {
79 ret = PyString_FromString(s);
80 free(s);
81 }
82 return ret;
83}
84
85static PyTypeObject pyrf_mmap_event__type = {
86 PyVarObject_HEAD_INIT(NULL, 0)
87 .tp_name = "perf.mmap_event",
88 .tp_basicsize = sizeof(struct pyrf_event),
89 .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
90 .tp_doc = pyrf_mmap_event__doc,
91 .tp_members = pyrf_mmap_event__members,
92 .tp_repr = (reprfunc)pyrf_mmap_event__repr,
93};
94
95static char pyrf_task_event__doc[] = PyDoc_STR("perf task (fork/exit) event object.");
96
97static PyMemberDef pyrf_task_event__members[] = {
98 sample_members
99 member_def(perf_event_header, type, T_UINT, "event type"),
100 member_def(fork_event, pid, T_UINT, "event pid"),
101 member_def(fork_event, ppid, T_UINT, "event ppid"),
102 member_def(fork_event, tid, T_UINT, "event tid"),
103 member_def(fork_event, ptid, T_UINT, "event ptid"),
104 member_def(fork_event, time, T_ULONGLONG, "timestamp"),
105 { .name = NULL, },
106};
107
108static PyObject *pyrf_task_event__repr(struct pyrf_event *pevent)
109{
110 return PyString_FromFormat("{ type: %s, pid: %u, ppid: %u, tid: %u, "
111 "ptid: %u, time: %" PRIu64 "}",
112 pevent->event.header.type == PERF_RECORD_FORK ? "fork" : "exit",
113 pevent->event.fork.pid,
114 pevent->event.fork.ppid,
115 pevent->event.fork.tid,
116 pevent->event.fork.ptid,
117 pevent->event.fork.time);
118}
119
120static PyTypeObject pyrf_task_event__type = {
121 PyVarObject_HEAD_INIT(NULL, 0)
122 .tp_name = "perf.task_event",
123 .tp_basicsize = sizeof(struct pyrf_event),
124 .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
125 .tp_doc = pyrf_task_event__doc,
126 .tp_members = pyrf_task_event__members,
127 .tp_repr = (reprfunc)pyrf_task_event__repr,
128};
129
130static char pyrf_comm_event__doc[] = PyDoc_STR("perf comm event object.");
131
132static PyMemberDef pyrf_comm_event__members[] = {
133 sample_members
134 member_def(perf_event_header, type, T_UINT, "event type"),
135 member_def(comm_event, pid, T_UINT, "event pid"),
136 member_def(comm_event, tid, T_UINT, "event tid"),
137 member_def(comm_event, comm, T_STRING_INPLACE, "process name"),
138 { .name = NULL, },
139};
140
141static PyObject *pyrf_comm_event__repr(struct pyrf_event *pevent)
142{
143 return PyString_FromFormat("{ type: comm, pid: %u, tid: %u, comm: %s }",
144 pevent->event.comm.pid,
145 pevent->event.comm.tid,
146 pevent->event.comm.comm);
147}
148
149static PyTypeObject pyrf_comm_event__type = {
150 PyVarObject_HEAD_INIT(NULL, 0)
151 .tp_name = "perf.comm_event",
152 .tp_basicsize = sizeof(struct pyrf_event),
153 .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
154 .tp_doc = pyrf_comm_event__doc,
155 .tp_members = pyrf_comm_event__members,
156 .tp_repr = (reprfunc)pyrf_comm_event__repr,
157};
158
159static char pyrf_throttle_event__doc[] = PyDoc_STR("perf throttle event object.");
160
161static PyMemberDef pyrf_throttle_event__members[] = {
162 sample_members
163 member_def(perf_event_header, type, T_UINT, "event type"),
164 member_def(throttle_event, time, T_ULONGLONG, "timestamp"),
165 member_def(throttle_event, id, T_ULONGLONG, "event id"),
166 member_def(throttle_event, stream_id, T_ULONGLONG, "event stream id"),
167 { .name = NULL, },
168};
169
170static PyObject *pyrf_throttle_event__repr(struct pyrf_event *pevent)
171{
172 struct throttle_event *te = (struct throttle_event *)(&pevent->event.header + 1);
173
174 return PyString_FromFormat("{ type: %sthrottle, time: %" PRIu64 ", id: %" PRIu64
175 ", stream_id: %" PRIu64 " }",
176 pevent->event.header.type == PERF_RECORD_THROTTLE ? "" : "un",
177 te->time, te->id, te->stream_id);
178}
179
180static PyTypeObject pyrf_throttle_event__type = {
181 PyVarObject_HEAD_INIT(NULL, 0)
182 .tp_name = "perf.throttle_event",
183 .tp_basicsize = sizeof(struct pyrf_event),
184 .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
185 .tp_doc = pyrf_throttle_event__doc,
186 .tp_members = pyrf_throttle_event__members,
187 .tp_repr = (reprfunc)pyrf_throttle_event__repr,
188};
189
190static int pyrf_event__setup_types(void)
191{
192 int err;
193 pyrf_mmap_event__type.tp_new =
194 pyrf_task_event__type.tp_new =
195 pyrf_comm_event__type.tp_new =
196 pyrf_throttle_event__type.tp_new = PyType_GenericNew;
197 err = PyType_Ready(&pyrf_mmap_event__type);
198 if (err < 0)
199 goto out;
200 err = PyType_Ready(&pyrf_task_event__type);
201 if (err < 0)
202 goto out;
203 err = PyType_Ready(&pyrf_comm_event__type);
204 if (err < 0)
205 goto out;
206 err = PyType_Ready(&pyrf_throttle_event__type);
207 if (err < 0)
208 goto out;
209out:
210 return err;
211}
212
213static PyTypeObject *pyrf_event__type[] = {
214 [PERF_RECORD_MMAP] = &pyrf_mmap_event__type,
215 [PERF_RECORD_LOST] = &pyrf_mmap_event__type,
216 [PERF_RECORD_COMM] = &pyrf_comm_event__type,
217 [PERF_RECORD_EXIT] = &pyrf_task_event__type,
218 [PERF_RECORD_THROTTLE] = &pyrf_throttle_event__type,
219 [PERF_RECORD_UNTHROTTLE] = &pyrf_throttle_event__type,
220 [PERF_RECORD_FORK] = &pyrf_task_event__type,
221 [PERF_RECORD_READ] = &pyrf_mmap_event__type,
222 [PERF_RECORD_SAMPLE] = &pyrf_mmap_event__type,
223};
224
225static PyObject *pyrf_event__new(union perf_event *event)
226{
227 struct pyrf_event *pevent;
228 PyTypeObject *ptype;
229
230 if (event->header.type < PERF_RECORD_MMAP ||
231 event->header.type > PERF_RECORD_SAMPLE)
232 return NULL;
233
234 ptype = pyrf_event__type[event->header.type];
235 pevent = PyObject_New(struct pyrf_event, ptype);
236 if (pevent != NULL)
237 memcpy(&pevent->event, event, event->header.size);
238 return (PyObject *)pevent;
239}
240
241struct pyrf_cpu_map {
242 PyObject_HEAD
243
244 struct cpu_map *cpus;
245};
246
247static int pyrf_cpu_map__init(struct pyrf_cpu_map *pcpus,
248 PyObject *args, PyObject *kwargs)
249{
250 static char *kwlist[] = { "cpustr", NULL, NULL, };
251 char *cpustr = NULL;
252
253 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|s",
254 kwlist, &cpustr))
255 return -1;
256
257 pcpus->cpus = cpu_map__new(cpustr);
258 if (pcpus->cpus == NULL)
259 return -1;
260 return 0;
261}
262
263static void pyrf_cpu_map__delete(struct pyrf_cpu_map *pcpus)
264{
265 cpu_map__delete(pcpus->cpus);
266 pcpus->ob_type->tp_free((PyObject*)pcpus);
267}
268
269static Py_ssize_t pyrf_cpu_map__length(PyObject *obj)
270{
271 struct pyrf_cpu_map *pcpus = (void *)obj;
272
273 return pcpus->cpus->nr;
274}
275
276static PyObject *pyrf_cpu_map__item(PyObject *obj, Py_ssize_t i)
277{
278 struct pyrf_cpu_map *pcpus = (void *)obj;
279
280 if (i >= pcpus->cpus->nr)
281 return NULL;
282
283 return Py_BuildValue("i", pcpus->cpus->map[i]);
284}
285
286static PySequenceMethods pyrf_cpu_map__sequence_methods = {
287 .sq_length = pyrf_cpu_map__length,
288 .sq_item = pyrf_cpu_map__item,
289};
290
291static char pyrf_cpu_map__doc[] = PyDoc_STR("cpu map object.");
292
293static PyTypeObject pyrf_cpu_map__type = {
294 PyVarObject_HEAD_INIT(NULL, 0)
295 .tp_name = "perf.cpu_map",
296 .tp_basicsize = sizeof(struct pyrf_cpu_map),
297 .tp_dealloc = (destructor)pyrf_cpu_map__delete,
298 .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
299 .tp_doc = pyrf_cpu_map__doc,
300 .tp_as_sequence = &pyrf_cpu_map__sequence_methods,
301 .tp_init = (initproc)pyrf_cpu_map__init,
302};
303
304static int pyrf_cpu_map__setup_types(void)
305{
306 pyrf_cpu_map__type.tp_new = PyType_GenericNew;
307 return PyType_Ready(&pyrf_cpu_map__type);
308}
309
310struct pyrf_thread_map {
311 PyObject_HEAD
312
313 struct thread_map *threads;
314};
315
316static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads,
317 PyObject *args, PyObject *kwargs)
318{
319 static char *kwlist[] = { "pid", "tid", NULL, NULL, };
320 int pid = -1, tid = -1;
321
322 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii",
323 kwlist, &pid, &tid))
324 return -1;
325
326 pthreads->threads = thread_map__new(pid, tid);
327 if (pthreads->threads == NULL)
328 return -1;
329 return 0;
330}
331
332static void pyrf_thread_map__delete(struct pyrf_thread_map *pthreads)
333{
334 thread_map__delete(pthreads->threads);
335 pthreads->ob_type->tp_free((PyObject*)pthreads);
336}
337
338static Py_ssize_t pyrf_thread_map__length(PyObject *obj)
339{
340 struct pyrf_thread_map *pthreads = (void *)obj;
341
342 return pthreads->threads->nr;
343}
344
345static PyObject *pyrf_thread_map__item(PyObject *obj, Py_ssize_t i)
346{
347 struct pyrf_thread_map *pthreads = (void *)obj;
348
349 if (i >= pthreads->threads->nr)
350 return NULL;
351
352 return Py_BuildValue("i", pthreads->threads->map[i]);
353}
354
355static PySequenceMethods pyrf_thread_map__sequence_methods = {
356 .sq_length = pyrf_thread_map__length,
357 .sq_item = pyrf_thread_map__item,
358};
359
360static char pyrf_thread_map__doc[] = PyDoc_STR("thread map object.");
361
362static PyTypeObject pyrf_thread_map__type = {
363 PyVarObject_HEAD_INIT(NULL, 0)
364 .tp_name = "perf.thread_map",
365 .tp_basicsize = sizeof(struct pyrf_thread_map),
366 .tp_dealloc = (destructor)pyrf_thread_map__delete,
367 .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
368 .tp_doc = pyrf_thread_map__doc,
369 .tp_as_sequence = &pyrf_thread_map__sequence_methods,
370 .tp_init = (initproc)pyrf_thread_map__init,
371};
372
373static int pyrf_thread_map__setup_types(void)
374{
375 pyrf_thread_map__type.tp_new = PyType_GenericNew;
376 return PyType_Ready(&pyrf_thread_map__type);
377}
378
379struct pyrf_evsel {
380 PyObject_HEAD
381
382 struct perf_evsel evsel;
383};
384
385static int pyrf_evsel__init(struct pyrf_evsel *pevsel,
386 PyObject *args, PyObject *kwargs)
387{
388 struct perf_event_attr attr = {
389 .type = PERF_TYPE_HARDWARE,
390 .config = PERF_COUNT_HW_CPU_CYCLES,
391 .sample_type = PERF_SAMPLE_PERIOD | PERF_SAMPLE_TID,
392 };
393 static char *kwlist[] = {
394 "type",
395 "config",
396 "sample_freq",
397 "sample_period",
398 "sample_type",
399 "read_format",
400 "disabled",
401 "inherit",
402 "pinned",
403 "exclusive",
404 "exclude_user",
405 "exclude_kernel",
406 "exclude_hv",
407 "exclude_idle",
408 "mmap",
409 "comm",
410 "freq",
411 "inherit_stat",
412 "enable_on_exec",
413 "task",
414 "watermark",
415 "precise_ip",
416 "mmap_data",
417 "sample_id_all",
418 "wakeup_events",
419 "bp_type",
420 "bp_addr",
421 "bp_len", NULL, NULL, };
422 u64 sample_period = 0;
423 u32 disabled = 0,
424 inherit = 0,
425 pinned = 0,
426 exclusive = 0,
427 exclude_user = 0,
428 exclude_kernel = 0,
429 exclude_hv = 0,
430 exclude_idle = 0,
431 mmap = 0,
432 comm = 0,
433 freq = 1,
434 inherit_stat = 0,
435 enable_on_exec = 0,
436 task = 0,
437 watermark = 0,
438 precise_ip = 0,
439 mmap_data = 0,
440 sample_id_all = 1;
441 int idx = 0;
442
443 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
444 "|iKiKKiiiiiiiiiiiiiiiiiiiiiKK", kwlist,
445 &attr.type, &attr.config, &attr.sample_freq,
446 &sample_period, &attr.sample_type,
447 &attr.read_format, &disabled, &inherit,
448 &pinned, &exclusive, &exclude_user,
449 &exclude_kernel, &exclude_hv, &exclude_idle,
450 &mmap, &comm, &freq, &inherit_stat,
451 &enable_on_exec, &task, &watermark,
452 &precise_ip, &mmap_data, &sample_id_all,
453 &attr.wakeup_events, &attr.bp_type,
454 &attr.bp_addr, &attr.bp_len, &idx))
455 return -1;
456
457 /* union... */
458 if (sample_period != 0) {
459 if (attr.sample_freq != 0)
460 return -1; /* FIXME: throw right exception */
461 attr.sample_period = sample_period;
462 }
463
464 /* Bitfields */
465 attr.disabled = disabled;
466 attr.inherit = inherit;
467 attr.pinned = pinned;
468 attr.exclusive = exclusive;
469 attr.exclude_user = exclude_user;
470 attr.exclude_kernel = exclude_kernel;
471 attr.exclude_hv = exclude_hv;
472 attr.exclude_idle = exclude_idle;
473 attr.mmap = mmap;
474 attr.comm = comm;
475 attr.freq = freq;
476 attr.inherit_stat = inherit_stat;
477 attr.enable_on_exec = enable_on_exec;
478 attr.task = task;
479 attr.watermark = watermark;
480 attr.precise_ip = precise_ip;
481 attr.mmap_data = mmap_data;
482 attr.sample_id_all = sample_id_all;
483
484 perf_evsel__init(&pevsel->evsel, &attr, idx);
485 return 0;
486}
487
488static void pyrf_evsel__delete(struct pyrf_evsel *pevsel)
489{
490 perf_evsel__exit(&pevsel->evsel);
491 pevsel->ob_type->tp_free((PyObject*)pevsel);
492}
493
494static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel,
495 PyObject *args, PyObject *kwargs)
496{
497 struct perf_evsel *evsel = &pevsel->evsel;
498 struct cpu_map *cpus = NULL;
499 struct thread_map *threads = NULL;
500 PyObject *pcpus = NULL, *pthreads = NULL;
501 int group = 0, overwrite = 0;
502 static char *kwlist[] = {"cpus", "threads", "group", "overwrite", NULL, NULL};
503
504 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist,
505 &pcpus, &pthreads, &group, &overwrite))
506 return NULL;
507
508 if (pthreads != NULL)
509 threads = ((struct pyrf_thread_map *)pthreads)->threads;
510
511 if (pcpus != NULL)
512 cpus = ((struct pyrf_cpu_map *)pcpus)->cpus;
513
514 if (perf_evsel__open(evsel, cpus, threads, group, overwrite) < 0) {
515 PyErr_SetFromErrno(PyExc_OSError);
516 return NULL;
517 }
518
519 Py_INCREF(Py_None);
520 return Py_None;
521}
522
523static PyMethodDef pyrf_evsel__methods[] = {
524 {
525 .ml_name = "open",
526 .ml_meth = (PyCFunction)pyrf_evsel__open,
527 .ml_flags = METH_VARARGS | METH_KEYWORDS,
528 .ml_doc = PyDoc_STR("open the event selector file descriptor table.")
529 },
530 { .ml_name = NULL, }
531};
532
533static char pyrf_evsel__doc[] = PyDoc_STR("perf event selector list object.");
534
535static PyTypeObject pyrf_evsel__type = {
536 PyVarObject_HEAD_INIT(NULL, 0)
537 .tp_name = "perf.evsel",
538 .tp_basicsize = sizeof(struct pyrf_evsel),
539 .tp_dealloc = (destructor)pyrf_evsel__delete,
540 .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
541 .tp_doc = pyrf_evsel__doc,
542 .tp_methods = pyrf_evsel__methods,
543 .tp_init = (initproc)pyrf_evsel__init,
544};
545
546static int pyrf_evsel__setup_types(void)
547{
548 pyrf_evsel__type.tp_new = PyType_GenericNew;
549 return PyType_Ready(&pyrf_evsel__type);
550}
551
552struct pyrf_evlist {
553 PyObject_HEAD
554
555 struct perf_evlist evlist;
556};
557
558static int pyrf_evlist__init(struct pyrf_evlist *pevlist,
559 PyObject *args, PyObject *kwargs __used)
560{
561 PyObject *pcpus = NULL, *pthreads = NULL;
562 struct cpu_map *cpus;
563 struct thread_map *threads;
564
565 if (!PyArg_ParseTuple(args, "OO", &pcpus, &pthreads))
566 return -1;
567
568 threads = ((struct pyrf_thread_map *)pthreads)->threads;
569 cpus = ((struct pyrf_cpu_map *)pcpus)->cpus;
570 perf_evlist__init(&pevlist->evlist, cpus, threads);
571 return 0;
572}
573
574static void pyrf_evlist__delete(struct pyrf_evlist *pevlist)
575{
576 perf_evlist__exit(&pevlist->evlist);
577 pevlist->ob_type->tp_free((PyObject*)pevlist);
578}
579
580static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist,
581 PyObject *args, PyObject *kwargs)
582{
583 struct perf_evlist *evlist = &pevlist->evlist;
584 static char *kwlist[] = {"pages", "overwrite",
585 NULL, NULL};
586 int pages = 128, overwrite = false;
587
588 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", kwlist,
589 &pages, &overwrite))
590 return NULL;
591
592 if (perf_evlist__mmap(evlist, pages, overwrite) < 0) {
593 PyErr_SetFromErrno(PyExc_OSError);
594 return NULL;
595 }
596
597 Py_INCREF(Py_None);
598 return Py_None;
599}
600
601static PyObject *pyrf_evlist__poll(struct pyrf_evlist *pevlist,
602 PyObject *args, PyObject *kwargs)
603{
604 struct perf_evlist *evlist = &pevlist->evlist;
605 static char *kwlist[] = {"timeout", NULL, NULL};
606 int timeout = -1, n;
607
608 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &timeout))
609 return NULL;
610
611 n = poll(evlist->pollfd, evlist->nr_fds, timeout);
612 if (n < 0) {
613 PyErr_SetFromErrno(PyExc_OSError);
614 return NULL;
615 }
616
617 return Py_BuildValue("i", n);
618}
619
620static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist,
621 PyObject *args __used, PyObject *kwargs __used)
622{
623 struct perf_evlist *evlist = &pevlist->evlist;
624 PyObject *list = PyList_New(0);
625 int i;
626
627 for (i = 0; i < evlist->nr_fds; ++i) {
628 PyObject *file;
629 FILE *fp = fdopen(evlist->pollfd[i].fd, "r");
630
631 if (fp == NULL)
632 goto free_list;
633
634 file = PyFile_FromFile(fp, "perf", "r", NULL);
635 if (file == NULL)
636 goto free_list;
637
638 if (PyList_Append(list, file) != 0) {
639 Py_DECREF(file);
640 goto free_list;
641 }
642
643 Py_DECREF(file);
644 }
645
646 return list;
647free_list:
648 return PyErr_NoMemory();
649}
650
651
652static PyObject *pyrf_evlist__add(struct pyrf_evlist *pevlist,
653 PyObject *args, PyObject *kwargs __used)
654{
655 struct perf_evlist *evlist = &pevlist->evlist;
656 PyObject *pevsel;
657 struct perf_evsel *evsel;
658
659 if (!PyArg_ParseTuple(args, "O", &pevsel))
660 return NULL;
661
662 Py_INCREF(pevsel);
663 evsel = &((struct pyrf_evsel *)pevsel)->evsel;
664 evsel->idx = evlist->nr_entries;
665 perf_evlist__add(evlist, evsel);
666
667 return Py_BuildValue("i", evlist->nr_entries);
668}
669
670static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
671 PyObject *args, PyObject *kwargs)
672{
673 struct perf_evlist *evlist = &pevlist->evlist;
674 union perf_event *event;
675 int sample_id_all = 1, cpu;
676 static char *kwlist[] = {"sample_id_all", NULL, NULL};
677
678 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist,
679 &cpu, &sample_id_all))
680 return NULL;
681
682 event = perf_evlist__read_on_cpu(evlist, cpu);
683 if (event != NULL) {
684 struct perf_evsel *first;
685 PyObject *pyevent = pyrf_event__new(event);
686 struct pyrf_event *pevent = (struct pyrf_event *)pyevent;
687
688 if (pyevent == NULL)
689 return PyErr_NoMemory();
690
691 first = list_entry(evlist->entries.next, struct perf_evsel, node);
692 perf_event__parse_sample(event, first->attr.sample_type, sample_id_all,
693 &pevent->sample);
694 return pyevent;
695 }
696
697 Py_INCREF(Py_None);
698 return Py_None;
699}
700
701static PyMethodDef pyrf_evlist__methods[] = {
702 {
703 .ml_name = "mmap",
704 .ml_meth = (PyCFunction)pyrf_evlist__mmap,
705 .ml_flags = METH_VARARGS | METH_KEYWORDS,
706 .ml_doc = PyDoc_STR("mmap the file descriptor table.")
707 },
708 {
709 .ml_name = "poll",
710 .ml_meth = (PyCFunction)pyrf_evlist__poll,
711 .ml_flags = METH_VARARGS | METH_KEYWORDS,
712 .ml_doc = PyDoc_STR("poll the file descriptor table.")
713 },
714 {
715 .ml_name = "get_pollfd",
716 .ml_meth = (PyCFunction)pyrf_evlist__get_pollfd,
717 .ml_flags = METH_VARARGS | METH_KEYWORDS,
718 .ml_doc = PyDoc_STR("get the poll file descriptor table.")
719 },
720 {
721 .ml_name = "add",
722 .ml_meth = (PyCFunction)pyrf_evlist__add,
723 .ml_flags = METH_VARARGS | METH_KEYWORDS,
724 .ml_doc = PyDoc_STR("adds an event selector to the list.")
725 },
726 {
727 .ml_name = "read_on_cpu",
728 .ml_meth = (PyCFunction)pyrf_evlist__read_on_cpu,
729 .ml_flags = METH_VARARGS | METH_KEYWORDS,
730 .ml_doc = PyDoc_STR("reads an event.")
731 },
732 { .ml_name = NULL, }
733};
734
735static Py_ssize_t pyrf_evlist__length(PyObject *obj)
736{
737 struct pyrf_evlist *pevlist = (void *)obj;
738
739 return pevlist->evlist.nr_entries;
740}
741
742static PyObject *pyrf_evlist__item(PyObject *obj, Py_ssize_t i)
743{
744 struct pyrf_evlist *pevlist = (void *)obj;
745 struct perf_evsel *pos;
746
747 if (i >= pevlist->evlist.nr_entries)
748 return NULL;
749
750 list_for_each_entry(pos, &pevlist->evlist.entries, node)
751 if (i-- == 0)
752 break;
753
754 return Py_BuildValue("O", container_of(pos, struct pyrf_evsel, evsel));
755}
756
757static PySequenceMethods pyrf_evlist__sequence_methods = {
758 .sq_length = pyrf_evlist__length,
759 .sq_item = pyrf_evlist__item,
760};
761
762static char pyrf_evlist__doc[] = PyDoc_STR("perf event selector list object.");
763
764static PyTypeObject pyrf_evlist__type = {
765 PyVarObject_HEAD_INIT(NULL, 0)
766 .tp_name = "perf.evlist",
767 .tp_basicsize = sizeof(struct pyrf_evlist),
768 .tp_dealloc = (destructor)pyrf_evlist__delete,
769 .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
770 .tp_as_sequence = &pyrf_evlist__sequence_methods,
771 .tp_doc = pyrf_evlist__doc,
772 .tp_methods = pyrf_evlist__methods,
773 .tp_init = (initproc)pyrf_evlist__init,
774};
775
776static int pyrf_evlist__setup_types(void)
777{
778 pyrf_evlist__type.tp_new = PyType_GenericNew;
779 return PyType_Ready(&pyrf_evlist__type);
780}
781
782static struct {
783 const char *name;
784 int value;
785} perf__constants[] = {
786 { "TYPE_HARDWARE", PERF_TYPE_HARDWARE },
787 { "TYPE_SOFTWARE", PERF_TYPE_SOFTWARE },
788 { "TYPE_TRACEPOINT", PERF_TYPE_TRACEPOINT },
789 { "TYPE_HW_CACHE", PERF_TYPE_HW_CACHE },
790 { "TYPE_RAW", PERF_TYPE_RAW },
791 { "TYPE_BREAKPOINT", PERF_TYPE_BREAKPOINT },
792
793 { "COUNT_HW_CPU_CYCLES", PERF_COUNT_HW_CPU_CYCLES },
794 { "COUNT_HW_INSTRUCTIONS", PERF_COUNT_HW_INSTRUCTIONS },
795 { "COUNT_HW_CACHE_REFERENCES", PERF_COUNT_HW_CACHE_REFERENCES },
796 { "COUNT_HW_CACHE_MISSES", PERF_COUNT_HW_CACHE_MISSES },
797 { "COUNT_HW_BRANCH_INSTRUCTIONS", PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
798 { "COUNT_HW_BRANCH_MISSES", PERF_COUNT_HW_BRANCH_MISSES },
799 { "COUNT_HW_BUS_CYCLES", PERF_COUNT_HW_BUS_CYCLES },
800 { "COUNT_HW_CACHE_L1D", PERF_COUNT_HW_CACHE_L1D },
801 { "COUNT_HW_CACHE_L1I", PERF_COUNT_HW_CACHE_L1I },
802 { "COUNT_HW_CACHE_LL", PERF_COUNT_HW_CACHE_LL },
803 { "COUNT_HW_CACHE_DTLB", PERF_COUNT_HW_CACHE_DTLB },
804 { "COUNT_HW_CACHE_ITLB", PERF_COUNT_HW_CACHE_ITLB },
805 { "COUNT_HW_CACHE_BPU", PERF_COUNT_HW_CACHE_BPU },
806 { "COUNT_HW_CACHE_OP_READ", PERF_COUNT_HW_CACHE_OP_READ },
807 { "COUNT_HW_CACHE_OP_WRITE", PERF_COUNT_HW_CACHE_OP_WRITE },
808 { "COUNT_HW_CACHE_OP_PREFETCH", PERF_COUNT_HW_CACHE_OP_PREFETCH },
809 { "COUNT_HW_CACHE_RESULT_ACCESS", PERF_COUNT_HW_CACHE_RESULT_ACCESS },
810 { "COUNT_HW_CACHE_RESULT_MISS", PERF_COUNT_HW_CACHE_RESULT_MISS },
811
812 { "COUNT_SW_CPU_CLOCK", PERF_COUNT_SW_CPU_CLOCK },
813 { "COUNT_SW_TASK_CLOCK", PERF_COUNT_SW_TASK_CLOCK },
814 { "COUNT_SW_PAGE_FAULTS", PERF_COUNT_SW_PAGE_FAULTS },
815 { "COUNT_SW_CONTEXT_SWITCHES", PERF_COUNT_SW_CONTEXT_SWITCHES },
816 { "COUNT_SW_CPU_MIGRATIONS", PERF_COUNT_SW_CPU_MIGRATIONS },
817 { "COUNT_SW_PAGE_FAULTS_MIN", PERF_COUNT_SW_PAGE_FAULTS_MIN },
818 { "COUNT_SW_PAGE_FAULTS_MAJ", PERF_COUNT_SW_PAGE_FAULTS_MAJ },
819 { "COUNT_SW_ALIGNMENT_FAULTS", PERF_COUNT_SW_ALIGNMENT_FAULTS },
820 { "COUNT_SW_EMULATION_FAULTS", PERF_COUNT_SW_EMULATION_FAULTS },
821
822 { "SAMPLE_IP", PERF_SAMPLE_IP },
823 { "SAMPLE_TID", PERF_SAMPLE_TID },
824 { "SAMPLE_TIME", PERF_SAMPLE_TIME },
825 { "SAMPLE_ADDR", PERF_SAMPLE_ADDR },
826 { "SAMPLE_READ", PERF_SAMPLE_READ },
827 { "SAMPLE_CALLCHAIN", PERF_SAMPLE_CALLCHAIN },
828 { "SAMPLE_ID", PERF_SAMPLE_ID },
829 { "SAMPLE_CPU", PERF_SAMPLE_CPU },
830 { "SAMPLE_PERIOD", PERF_SAMPLE_PERIOD },
831 { "SAMPLE_STREAM_ID", PERF_SAMPLE_STREAM_ID },
832 { "SAMPLE_RAW", PERF_SAMPLE_RAW },
833
834 { "FORMAT_TOTAL_TIME_ENABLED", PERF_FORMAT_TOTAL_TIME_ENABLED },
835 { "FORMAT_TOTAL_TIME_RUNNING", PERF_FORMAT_TOTAL_TIME_RUNNING },
836 { "FORMAT_ID", PERF_FORMAT_ID },
837 { "FORMAT_GROUP", PERF_FORMAT_GROUP },
838
839 { "RECORD_MMAP", PERF_RECORD_MMAP },
840 { "RECORD_LOST", PERF_RECORD_LOST },
841 { "RECORD_COMM", PERF_RECORD_COMM },
842 { "RECORD_EXIT", PERF_RECORD_EXIT },
843 { "RECORD_THROTTLE", PERF_RECORD_THROTTLE },
844 { "RECORD_UNTHROTTLE", PERF_RECORD_UNTHROTTLE },
845 { "RECORD_FORK", PERF_RECORD_FORK },
846 { "RECORD_READ", PERF_RECORD_READ },
847 { "RECORD_SAMPLE", PERF_RECORD_SAMPLE },
848 { .name = NULL, },
849};
850
851static PyMethodDef perf__methods[] = {
852 { .ml_name = NULL, }
853};
854
855PyMODINIT_FUNC initperf(void)
856{
857 PyObject *obj;
858 int i;
859 PyObject *dict, *module = Py_InitModule("perf", perf__methods);
860
861 if (module == NULL ||
862 pyrf_event__setup_types() < 0 ||
863 pyrf_evlist__setup_types() < 0 ||
864 pyrf_evsel__setup_types() < 0 ||
865 pyrf_thread_map__setup_types() < 0 ||
866 pyrf_cpu_map__setup_types() < 0)
867 return;
868
869 Py_INCREF(&pyrf_evlist__type);
870 PyModule_AddObject(module, "evlist", (PyObject*)&pyrf_evlist__type);
871
872 Py_INCREF(&pyrf_evsel__type);
873 PyModule_AddObject(module, "evsel", (PyObject*)&pyrf_evsel__type);
874
875 Py_INCREF(&pyrf_thread_map__type);
876 PyModule_AddObject(module, "thread_map", (PyObject*)&pyrf_thread_map__type);
877
878 Py_INCREF(&pyrf_cpu_map__type);
879 PyModule_AddObject(module, "cpu_map", (PyObject*)&pyrf_cpu_map__type);
880
881 dict = PyModule_GetDict(module);
882 if (dict == NULL)
883 goto error;
884
885 for (i = 0; perf__constants[i].name != NULL; i++) {
886 obj = PyInt_FromLong(perf__constants[i].value);
887 if (obj == NULL)
888 goto error;
889 PyDict_SetItemString(dict, perf__constants[i].name, obj);
890 Py_DECREF(obj);
891 }
892
893error:
894 if (PyErr_Occurred())
895 PyErr_SetString(PyExc_ImportError, "perf: Init failed!");
896}
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index c6d99334bdfa..2040b8538527 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -248,8 +248,7 @@ static void python_process_event(int cpu, void *data,
248 context = PyCObject_FromVoidPtr(scripting_context, NULL); 248 context = PyCObject_FromVoidPtr(scripting_context, NULL);
249 249
250 PyTuple_SetItem(t, n++, PyString_FromString(handler_name)); 250 PyTuple_SetItem(t, n++, PyString_FromString(handler_name));
251 PyTuple_SetItem(t, n++, 251 PyTuple_SetItem(t, n++, context);
252 PyCObject_FromVoidPtr(scripting_context, NULL));
253 252
254 if (handler) { 253 if (handler) {
255 PyTuple_SetItem(t, n++, PyInt_FromLong(cpu)); 254 PyTuple_SetItem(t, n++, PyInt_FromLong(cpu));
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 105f00bfd555..f26639fa0fb3 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -7,6 +7,8 @@
7#include <sys/types.h> 7#include <sys/types.h>
8#include <sys/mman.h> 8#include <sys/mman.h>
9 9
10#include "evlist.h"
11#include "evsel.h"
10#include "session.h" 12#include "session.h"
11#include "sort.h" 13#include "sort.h"
12#include "util.h" 14#include "util.h"
@@ -19,7 +21,7 @@ static int perf_session__open(struct perf_session *self, bool force)
19 self->fd_pipe = true; 21 self->fd_pipe = true;
20 self->fd = STDIN_FILENO; 22 self->fd = STDIN_FILENO;
21 23
22 if (perf_header__read(self, self->fd) < 0) 24 if (perf_session__read_header(self, self->fd) < 0)
23 pr_err("incompatible file format"); 25 pr_err("incompatible file format");
24 26
25 return 0; 27 return 0;
@@ -51,7 +53,7 @@ static int perf_session__open(struct perf_session *self, bool force)
51 goto out_close; 53 goto out_close;
52 } 54 }
53 55
54 if (perf_header__read(self, self->fd) < 0) { 56 if (perf_session__read_header(self, self->fd) < 0) {
55 pr_err("incompatible file format"); 57 pr_err("incompatible file format");
56 goto out_close; 58 goto out_close;
57 } 59 }
@@ -67,7 +69,7 @@ out_close:
67 69
68static void perf_session__id_header_size(struct perf_session *session) 70static void perf_session__id_header_size(struct perf_session *session)
69{ 71{
70 struct sample_data *data; 72 struct perf_sample *data;
71 u64 sample_type = session->sample_type; 73 u64 sample_type = session->sample_type;
72 u16 size = 0; 74 u16 size = 0;
73 75
@@ -92,21 +94,10 @@ out:
92 session->id_hdr_size = size; 94 session->id_hdr_size = size;
93} 95}
94 96
95void perf_session__set_sample_id_all(struct perf_session *session, bool value)
96{
97 session->sample_id_all = value;
98 perf_session__id_header_size(session);
99}
100
101void perf_session__set_sample_type(struct perf_session *session, u64 type)
102{
103 session->sample_type = type;
104}
105
106void perf_session__update_sample_type(struct perf_session *self) 97void perf_session__update_sample_type(struct perf_session *self)
107{ 98{
108 self->sample_type = perf_header__sample_type(&self->header); 99 self->sample_type = perf_evlist__sample_type(self->evlist);
109 self->sample_id_all = perf_header__sample_id_all(&self->header); 100 self->sample_id_all = perf_evlist__sample_id_all(self->evlist);
110 perf_session__id_header_size(self); 101 perf_session__id_header_size(self);
111} 102}
112 103
@@ -135,13 +126,9 @@ struct perf_session *perf_session__new(const char *filename, int mode,
135 if (self == NULL) 126 if (self == NULL)
136 goto out; 127 goto out;
137 128
138 if (perf_header__init(&self->header) < 0)
139 goto out_free;
140
141 memcpy(self->filename, filename, len); 129 memcpy(self->filename, filename, len);
142 self->threads = RB_ROOT; 130 self->threads = RB_ROOT;
143 INIT_LIST_HEAD(&self->dead_threads); 131 INIT_LIST_HEAD(&self->dead_threads);
144 self->hists_tree = RB_ROOT;
145 self->last_match = NULL; 132 self->last_match = NULL;
146 /* 133 /*
147 * On 64bit we can mmap the data file in one go. No need for tiny mmap 134 * On 64bit we can mmap the data file in one go. No need for tiny mmap
@@ -162,17 +149,16 @@ struct perf_session *perf_session__new(const char *filename, int mode,
162 if (mode == O_RDONLY) { 149 if (mode == O_RDONLY) {
163 if (perf_session__open(self, force) < 0) 150 if (perf_session__open(self, force) < 0)
164 goto out_delete; 151 goto out_delete;
152 perf_session__update_sample_type(self);
165 } else if (mode == O_WRONLY) { 153 } else if (mode == O_WRONLY) {
166 /* 154 /*
167 * In O_RDONLY mode this will be performed when reading the 155 * In O_RDONLY mode this will be performed when reading the
168 * kernel MMAP event, in event__process_mmap(). 156 * kernel MMAP event, in perf_event__process_mmap().
169 */ 157 */
170 if (perf_session__create_kernel_maps(self) < 0) 158 if (perf_session__create_kernel_maps(self) < 0)
171 goto out_delete; 159 goto out_delete;
172 } 160 }
173 161
174 perf_session__update_sample_type(self);
175
176 if (ops && ops->ordering_requires_timestamps && 162 if (ops && ops->ordering_requires_timestamps &&
177 ops->ordered_samples && !self->sample_id_all) { 163 ops->ordered_samples && !self->sample_id_all) {
178 dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n"); 164 dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
@@ -181,9 +167,6 @@ struct perf_session *perf_session__new(const char *filename, int mode,
181 167
182out: 168out:
183 return self; 169 return self;
184out_free:
185 free(self);
186 return NULL;
187out_delete: 170out_delete:
188 perf_session__delete(self); 171 perf_session__delete(self);
189 return NULL; 172 return NULL;
@@ -214,7 +197,6 @@ static void perf_session__delete_threads(struct perf_session *self)
214 197
215void perf_session__delete(struct perf_session *self) 198void perf_session__delete(struct perf_session *self)
216{ 199{
217 perf_header__exit(&self->header);
218 perf_session__destroy_kernel_maps(self); 200 perf_session__destroy_kernel_maps(self);
219 perf_session__delete_dead_threads(self); 201 perf_session__delete_dead_threads(self);
220 perf_session__delete_threads(self); 202 perf_session__delete_threads(self);
@@ -242,17 +224,16 @@ static bool symbol__match_parent_regex(struct symbol *sym)
242 return 0; 224 return 0;
243} 225}
244 226
245struct map_symbol *perf_session__resolve_callchain(struct perf_session *self, 227int perf_session__resolve_callchain(struct perf_session *self,
246 struct thread *thread, 228 struct thread *thread,
247 struct ip_callchain *chain, 229 struct ip_callchain *chain,
248 struct symbol **parent) 230 struct symbol **parent)
249{ 231{
250 u8 cpumode = PERF_RECORD_MISC_USER; 232 u8 cpumode = PERF_RECORD_MISC_USER;
251 unsigned int i; 233 unsigned int i;
252 struct map_symbol *syms = calloc(chain->nr, sizeof(*syms)); 234 int err;
253 235
254 if (!syms) 236 callchain_cursor_reset(&self->callchain_cursor);
255 return NULL;
256 237
257 for (i = 0; i < chain->nr; i++) { 238 for (i = 0; i < chain->nr; i++) {
258 u64 ip = chain->ips[i]; 239 u64 ip = chain->ips[i];
@@ -281,30 +262,33 @@ struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
281 *parent = al.sym; 262 *parent = al.sym;
282 if (!symbol_conf.use_callchain) 263 if (!symbol_conf.use_callchain)
283 break; 264 break;
284 syms[i].map = al.map;
285 syms[i].sym = al.sym;
286 } 265 }
266
267 err = callchain_cursor_append(&self->callchain_cursor,
268 ip, al.map, al.sym);
269 if (err)
270 return err;
287 } 271 }
288 272
289 return syms; 273 return 0;
290} 274}
291 275
292static int process_event_synth_stub(event_t *event __used, 276static int process_event_synth_stub(union perf_event *event __used,
293 struct perf_session *session __used) 277 struct perf_session *session __used)
294{ 278{
295 dump_printf(": unhandled!\n"); 279 dump_printf(": unhandled!\n");
296 return 0; 280 return 0;
297} 281}
298 282
299static int process_event_stub(event_t *event __used, 283static int process_event_stub(union perf_event *event __used,
300 struct sample_data *sample __used, 284 struct perf_sample *sample __used,
301 struct perf_session *session __used) 285 struct perf_session *session __used)
302{ 286{
303 dump_printf(": unhandled!\n"); 287 dump_printf(": unhandled!\n");
304 return 0; 288 return 0;
305} 289}
306 290
307static int process_finished_round_stub(event_t *event __used, 291static int process_finished_round_stub(union perf_event *event __used,
308 struct perf_session *session __used, 292 struct perf_session *session __used,
309 struct perf_event_ops *ops __used) 293 struct perf_event_ops *ops __used)
310{ 294{
@@ -312,7 +296,7 @@ static int process_finished_round_stub(event_t *event __used,
312 return 0; 296 return 0;
313} 297}
314 298
315static int process_finished_round(event_t *event, 299static int process_finished_round(union perf_event *event,
316 struct perf_session *session, 300 struct perf_session *session,
317 struct perf_event_ops *ops); 301 struct perf_event_ops *ops);
318 302
@@ -329,7 +313,7 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
329 if (handler->exit == NULL) 313 if (handler->exit == NULL)
330 handler->exit = process_event_stub; 314 handler->exit = process_event_stub;
331 if (handler->lost == NULL) 315 if (handler->lost == NULL)
332 handler->lost = event__process_lost; 316 handler->lost = perf_event__process_lost;
333 if (handler->read == NULL) 317 if (handler->read == NULL)
334 handler->read = process_event_stub; 318 handler->read = process_event_stub;
335 if (handler->throttle == NULL) 319 if (handler->throttle == NULL)
@@ -363,98 +347,98 @@ void mem_bswap_64(void *src, int byte_size)
363 } 347 }
364} 348}
365 349
366static void event__all64_swap(event_t *self) 350static void perf_event__all64_swap(union perf_event *event)
367{ 351{
368 struct perf_event_header *hdr = &self->header; 352 struct perf_event_header *hdr = &event->header;
369 mem_bswap_64(hdr + 1, self->header.size - sizeof(*hdr)); 353 mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr));
370} 354}
371 355
372static void event__comm_swap(event_t *self) 356static void perf_event__comm_swap(union perf_event *event)
373{ 357{
374 self->comm.pid = bswap_32(self->comm.pid); 358 event->comm.pid = bswap_32(event->comm.pid);
375 self->comm.tid = bswap_32(self->comm.tid); 359 event->comm.tid = bswap_32(event->comm.tid);
376} 360}
377 361
378static void event__mmap_swap(event_t *self) 362static void perf_event__mmap_swap(union perf_event *event)
379{ 363{
380 self->mmap.pid = bswap_32(self->mmap.pid); 364 event->mmap.pid = bswap_32(event->mmap.pid);
381 self->mmap.tid = bswap_32(self->mmap.tid); 365 event->mmap.tid = bswap_32(event->mmap.tid);
382 self->mmap.start = bswap_64(self->mmap.start); 366 event->mmap.start = bswap_64(event->mmap.start);
383 self->mmap.len = bswap_64(self->mmap.len); 367 event->mmap.len = bswap_64(event->mmap.len);
384 self->mmap.pgoff = bswap_64(self->mmap.pgoff); 368 event->mmap.pgoff = bswap_64(event->mmap.pgoff);
385} 369}
386 370
387static void event__task_swap(event_t *self) 371static void perf_event__task_swap(union perf_event *event)
388{ 372{
389 self->fork.pid = bswap_32(self->fork.pid); 373 event->fork.pid = bswap_32(event->fork.pid);
390 self->fork.tid = bswap_32(self->fork.tid); 374 event->fork.tid = bswap_32(event->fork.tid);
391 self->fork.ppid = bswap_32(self->fork.ppid); 375 event->fork.ppid = bswap_32(event->fork.ppid);
392 self->fork.ptid = bswap_32(self->fork.ptid); 376 event->fork.ptid = bswap_32(event->fork.ptid);
393 self->fork.time = bswap_64(self->fork.time); 377 event->fork.time = bswap_64(event->fork.time);
394} 378}
395 379
396static void event__read_swap(event_t *self) 380static void perf_event__read_swap(union perf_event *event)
397{ 381{
398 self->read.pid = bswap_32(self->read.pid); 382 event->read.pid = bswap_32(event->read.pid);
399 self->read.tid = bswap_32(self->read.tid); 383 event->read.tid = bswap_32(event->read.tid);
400 self->read.value = bswap_64(self->read.value); 384 event->read.value = bswap_64(event->read.value);
401 self->read.time_enabled = bswap_64(self->read.time_enabled); 385 event->read.time_enabled = bswap_64(event->read.time_enabled);
402 self->read.time_running = bswap_64(self->read.time_running); 386 event->read.time_running = bswap_64(event->read.time_running);
403 self->read.id = bswap_64(self->read.id); 387 event->read.id = bswap_64(event->read.id);
404} 388}
405 389
406static void event__attr_swap(event_t *self) 390static void perf_event__attr_swap(union perf_event *event)
407{ 391{
408 size_t size; 392 size_t size;
409 393
410 self->attr.attr.type = bswap_32(self->attr.attr.type); 394 event->attr.attr.type = bswap_32(event->attr.attr.type);
411 self->attr.attr.size = bswap_32(self->attr.attr.size); 395 event->attr.attr.size = bswap_32(event->attr.attr.size);
412 self->attr.attr.config = bswap_64(self->attr.attr.config); 396 event->attr.attr.config = bswap_64(event->attr.attr.config);
413 self->attr.attr.sample_period = bswap_64(self->attr.attr.sample_period); 397 event->attr.attr.sample_period = bswap_64(event->attr.attr.sample_period);
414 self->attr.attr.sample_type = bswap_64(self->attr.attr.sample_type); 398 event->attr.attr.sample_type = bswap_64(event->attr.attr.sample_type);
415 self->attr.attr.read_format = bswap_64(self->attr.attr.read_format); 399 event->attr.attr.read_format = bswap_64(event->attr.attr.read_format);
416 self->attr.attr.wakeup_events = bswap_32(self->attr.attr.wakeup_events); 400 event->attr.attr.wakeup_events = bswap_32(event->attr.attr.wakeup_events);
417 self->attr.attr.bp_type = bswap_32(self->attr.attr.bp_type); 401 event->attr.attr.bp_type = bswap_32(event->attr.attr.bp_type);
418 self->attr.attr.bp_addr = bswap_64(self->attr.attr.bp_addr); 402 event->attr.attr.bp_addr = bswap_64(event->attr.attr.bp_addr);
419 self->attr.attr.bp_len = bswap_64(self->attr.attr.bp_len); 403 event->attr.attr.bp_len = bswap_64(event->attr.attr.bp_len);
420 404
421 size = self->header.size; 405 size = event->header.size;
422 size -= (void *)&self->attr.id - (void *)self; 406 size -= (void *)&event->attr.id - (void *)event;
423 mem_bswap_64(self->attr.id, size); 407 mem_bswap_64(event->attr.id, size);
424} 408}
425 409
426static void event__event_type_swap(event_t *self) 410static void perf_event__event_type_swap(union perf_event *event)
427{ 411{
428 self->event_type.event_type.event_id = 412 event->event_type.event_type.event_id =
429 bswap_64(self->event_type.event_type.event_id); 413 bswap_64(event->event_type.event_type.event_id);
430} 414}
431 415
432static void event__tracing_data_swap(event_t *self) 416static void perf_event__tracing_data_swap(union perf_event *event)
433{ 417{
434 self->tracing_data.size = bswap_32(self->tracing_data.size); 418 event->tracing_data.size = bswap_32(event->tracing_data.size);
435} 419}
436 420
437typedef void (*event__swap_op)(event_t *self); 421typedef void (*perf_event__swap_op)(union perf_event *event);
438 422
439static event__swap_op event__swap_ops[] = { 423static perf_event__swap_op perf_event__swap_ops[] = {
440 [PERF_RECORD_MMAP] = event__mmap_swap, 424 [PERF_RECORD_MMAP] = perf_event__mmap_swap,
441 [PERF_RECORD_COMM] = event__comm_swap, 425 [PERF_RECORD_COMM] = perf_event__comm_swap,
442 [PERF_RECORD_FORK] = event__task_swap, 426 [PERF_RECORD_FORK] = perf_event__task_swap,
443 [PERF_RECORD_EXIT] = event__task_swap, 427 [PERF_RECORD_EXIT] = perf_event__task_swap,
444 [PERF_RECORD_LOST] = event__all64_swap, 428 [PERF_RECORD_LOST] = perf_event__all64_swap,
445 [PERF_RECORD_READ] = event__read_swap, 429 [PERF_RECORD_READ] = perf_event__read_swap,
446 [PERF_RECORD_SAMPLE] = event__all64_swap, 430 [PERF_RECORD_SAMPLE] = perf_event__all64_swap,
447 [PERF_RECORD_HEADER_ATTR] = event__attr_swap, 431 [PERF_RECORD_HEADER_ATTR] = perf_event__attr_swap,
448 [PERF_RECORD_HEADER_EVENT_TYPE] = event__event_type_swap, 432 [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap,
449 [PERF_RECORD_HEADER_TRACING_DATA] = event__tracing_data_swap, 433 [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
450 [PERF_RECORD_HEADER_BUILD_ID] = NULL, 434 [PERF_RECORD_HEADER_BUILD_ID] = NULL,
451 [PERF_RECORD_HEADER_MAX] = NULL, 435 [PERF_RECORD_HEADER_MAX] = NULL,
452}; 436};
453 437
454struct sample_queue { 438struct sample_queue {
455 u64 timestamp; 439 u64 timestamp;
456 u64 file_offset; 440 u64 file_offset;
457 event_t *event; 441 union perf_event *event;
458 struct list_head list; 442 struct list_head list;
459}; 443};
460 444
@@ -472,8 +456,8 @@ static void perf_session_free_sample_buffers(struct perf_session *session)
472} 456}
473 457
474static int perf_session_deliver_event(struct perf_session *session, 458static int perf_session_deliver_event(struct perf_session *session,
475 event_t *event, 459 union perf_event *event,
476 struct sample_data *sample, 460 struct perf_sample *sample,
477 struct perf_event_ops *ops, 461 struct perf_event_ops *ops,
478 u64 file_offset); 462 u64 file_offset);
479 463
@@ -483,7 +467,7 @@ static void flush_sample_queue(struct perf_session *s,
483 struct ordered_samples *os = &s->ordered_samples; 467 struct ordered_samples *os = &s->ordered_samples;
484 struct list_head *head = &os->samples; 468 struct list_head *head = &os->samples;
485 struct sample_queue *tmp, *iter; 469 struct sample_queue *tmp, *iter;
486 struct sample_data sample; 470 struct perf_sample sample;
487 u64 limit = os->next_flush; 471 u64 limit = os->next_flush;
488 u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL; 472 u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
489 473
@@ -494,7 +478,7 @@ static void flush_sample_queue(struct perf_session *s,
494 if (iter->timestamp > limit) 478 if (iter->timestamp > limit)
495 break; 479 break;
496 480
497 event__parse_sample(iter->event, s, &sample); 481 perf_session__parse_sample(s, iter->event, &sample);
498 perf_session_deliver_event(s, iter->event, &sample, ops, 482 perf_session_deliver_event(s, iter->event, &sample, ops,
499 iter->file_offset); 483 iter->file_offset);
500 484
@@ -550,7 +534,7 @@ static void flush_sample_queue(struct perf_session *s,
550 * Flush every events below timestamp 7 534 * Flush every events below timestamp 7
551 * etc... 535 * etc...
552 */ 536 */
553static int process_finished_round(event_t *event __used, 537static int process_finished_round(union perf_event *event __used,
554 struct perf_session *session, 538 struct perf_session *session,
555 struct perf_event_ops *ops) 539 struct perf_event_ops *ops)
556{ 540{
@@ -607,12 +591,12 @@ static void __queue_event(struct sample_queue *new, struct perf_session *s)
607 591
608#define MAX_SAMPLE_BUFFER (64 * 1024 / sizeof(struct sample_queue)) 592#define MAX_SAMPLE_BUFFER (64 * 1024 / sizeof(struct sample_queue))
609 593
610static int perf_session_queue_event(struct perf_session *s, event_t *event, 594static int perf_session_queue_event(struct perf_session *s, union perf_event *event,
611 struct sample_data *data, u64 file_offset) 595 struct perf_sample *sample, u64 file_offset)
612{ 596{
613 struct ordered_samples *os = &s->ordered_samples; 597 struct ordered_samples *os = &s->ordered_samples;
614 struct list_head *sc = &os->sample_cache; 598 struct list_head *sc = &os->sample_cache;
615 u64 timestamp = data->time; 599 u64 timestamp = sample->time;
616 struct sample_queue *new; 600 struct sample_queue *new;
617 601
618 if (!timestamp || timestamp == ~0ULL) 602 if (!timestamp || timestamp == ~0ULL)
@@ -648,7 +632,7 @@ static int perf_session_queue_event(struct perf_session *s, event_t *event,
648 return 0; 632 return 0;
649} 633}
650 634
651static void callchain__printf(struct sample_data *sample) 635static void callchain__printf(struct perf_sample *sample)
652{ 636{
653 unsigned int i; 637 unsigned int i;
654 638
@@ -660,8 +644,8 @@ static void callchain__printf(struct sample_data *sample)
660} 644}
661 645
662static void perf_session__print_tstamp(struct perf_session *session, 646static void perf_session__print_tstamp(struct perf_session *session,
663 event_t *event, 647 union perf_event *event,
664 struct sample_data *sample) 648 struct perf_sample *sample)
665{ 649{
666 if (event->header.type != PERF_RECORD_SAMPLE && 650 if (event->header.type != PERF_RECORD_SAMPLE &&
667 !session->sample_id_all) { 651 !session->sample_id_all) {
@@ -676,8 +660,8 @@ static void perf_session__print_tstamp(struct perf_session *session,
676 printf("%" PRIu64 " ", sample->time); 660 printf("%" PRIu64 " ", sample->time);
677} 661}
678 662
679static void dump_event(struct perf_session *session, event_t *event, 663static void dump_event(struct perf_session *session, union perf_event *event,
680 u64 file_offset, struct sample_data *sample) 664 u64 file_offset, struct perf_sample *sample)
681{ 665{
682 if (!dump_trace) 666 if (!dump_trace)
683 return; 667 return;
@@ -691,11 +675,11 @@ static void dump_event(struct perf_session *session, event_t *event,
691 perf_session__print_tstamp(session, event, sample); 675 perf_session__print_tstamp(session, event, sample);
692 676
693 printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset, 677 printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset,
694 event->header.size, event__get_event_name(event->header.type)); 678 event->header.size, perf_event__name(event->header.type));
695} 679}
696 680
697static void dump_sample(struct perf_session *session, event_t *event, 681static void dump_sample(struct perf_session *session, union perf_event *event,
698 struct sample_data *sample) 682 struct perf_sample *sample)
699{ 683{
700 if (!dump_trace) 684 if (!dump_trace)
701 return; 685 return;
@@ -709,8 +693,8 @@ static void dump_sample(struct perf_session *session, event_t *event,
709} 693}
710 694
711static int perf_session_deliver_event(struct perf_session *session, 695static int perf_session_deliver_event(struct perf_session *session,
712 event_t *event, 696 union perf_event *event,
713 struct sample_data *sample, 697 struct perf_sample *sample,
714 struct perf_event_ops *ops, 698 struct perf_event_ops *ops,
715 u64 file_offset) 699 u64 file_offset)
716{ 700{
@@ -743,7 +727,7 @@ static int perf_session_deliver_event(struct perf_session *session,
743} 727}
744 728
745static int perf_session__preprocess_sample(struct perf_session *session, 729static int perf_session__preprocess_sample(struct perf_session *session,
746 event_t *event, struct sample_data *sample) 730 union perf_event *event, struct perf_sample *sample)
747{ 731{
748 if (event->header.type != PERF_RECORD_SAMPLE || 732 if (event->header.type != PERF_RECORD_SAMPLE ||
749 !(session->sample_type & PERF_SAMPLE_CALLCHAIN)) 733 !(session->sample_type & PERF_SAMPLE_CALLCHAIN))
@@ -758,7 +742,7 @@ static int perf_session__preprocess_sample(struct perf_session *session,
758 return 0; 742 return 0;
759} 743}
760 744
761static int perf_session__process_user_event(struct perf_session *session, event_t *event, 745static int perf_session__process_user_event(struct perf_session *session, union perf_event *event,
762 struct perf_event_ops *ops, u64 file_offset) 746 struct perf_event_ops *ops, u64 file_offset)
763{ 747{
764 dump_event(session, event, file_offset, NULL); 748 dump_event(session, event, file_offset, NULL);
@@ -783,15 +767,16 @@ static int perf_session__process_user_event(struct perf_session *session, event_
783} 767}
784 768
785static int perf_session__process_event(struct perf_session *session, 769static int perf_session__process_event(struct perf_session *session,
786 event_t *event, 770 union perf_event *event,
787 struct perf_event_ops *ops, 771 struct perf_event_ops *ops,
788 u64 file_offset) 772 u64 file_offset)
789{ 773{
790 struct sample_data sample; 774 struct perf_sample sample;
791 int ret; 775 int ret;
792 776
793 if (session->header.needs_swap && event__swap_ops[event->header.type]) 777 if (session->header.needs_swap &&
794 event__swap_ops[event->header.type](event); 778 perf_event__swap_ops[event->header.type])
779 perf_event__swap_ops[event->header.type](event);
795 780
796 if (event->header.type >= PERF_RECORD_HEADER_MAX) 781 if (event->header.type >= PERF_RECORD_HEADER_MAX)
797 return -EINVAL; 782 return -EINVAL;
@@ -804,7 +789,7 @@ static int perf_session__process_event(struct perf_session *session,
804 /* 789 /*
805 * For all kernel events we get the sample data 790 * For all kernel events we get the sample data
806 */ 791 */
807 event__parse_sample(event, session, &sample); 792 perf_session__parse_sample(session, event, &sample);
808 793
809 /* Preprocess sample records - precheck callchains */ 794 /* Preprocess sample records - precheck callchains */
810 if (perf_session__preprocess_sample(session, event, &sample)) 795 if (perf_session__preprocess_sample(session, event, &sample))
@@ -843,7 +828,7 @@ static struct thread *perf_session__register_idle_thread(struct perf_session *se
843static void perf_session__warn_about_errors(const struct perf_session *session, 828static void perf_session__warn_about_errors(const struct perf_session *session,
844 const struct perf_event_ops *ops) 829 const struct perf_event_ops *ops)
845{ 830{
846 if (ops->lost == event__process_lost && 831 if (ops->lost == perf_event__process_lost &&
847 session->hists.stats.total_lost != 0) { 832 session->hists.stats.total_lost != 0) {
848 ui__warning("Processed %" PRIu64 " events and LOST %" PRIu64 833 ui__warning("Processed %" PRIu64 " events and LOST %" PRIu64
849 "!\n\nCheck IO/CPU overload!\n\n", 834 "!\n\nCheck IO/CPU overload!\n\n",
@@ -875,7 +860,7 @@ volatile int session_done;
875static int __perf_session__process_pipe_events(struct perf_session *self, 860static int __perf_session__process_pipe_events(struct perf_session *self,
876 struct perf_event_ops *ops) 861 struct perf_event_ops *ops)
877{ 862{
878 event_t event; 863 union perf_event event;
879 uint32_t size; 864 uint32_t size;
880 int skip = 0; 865 int skip = 0;
881 u64 head; 866 u64 head;
@@ -956,7 +941,7 @@ int __perf_session__process_events(struct perf_session *session,
956 struct ui_progress *progress; 941 struct ui_progress *progress;
957 size_t page_size, mmap_size; 942 size_t page_size, mmap_size;
958 char *buf, *mmaps[8]; 943 char *buf, *mmaps[8];
959 event_t *event; 944 union perf_event *event;
960 uint32_t size; 945 uint32_t size;
961 946
962 perf_event_ops__fill_defaults(ops); 947 perf_event_ops__fill_defaults(ops);
@@ -1001,7 +986,7 @@ remap:
1001 file_pos = file_offset + head; 986 file_pos = file_offset + head;
1002 987
1003more: 988more:
1004 event = (event_t *)(buf + head); 989 event = (union perf_event *)(buf + head);
1005 990
1006 if (session->header.needs_swap) 991 if (session->header.needs_swap)
1007 perf_event_header__bswap(&event->header); 992 perf_event_header__bswap(&event->header);
@@ -1134,3 +1119,18 @@ size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp,
1134 size_t ret = machine__fprintf_dsos_buildid(&self->host_machine, fp, with_hits); 1119 size_t ret = machine__fprintf_dsos_buildid(&self->host_machine, fp, with_hits);
1135 return ret + machines__fprintf_dsos_buildid(&self->machines, fp, with_hits); 1120 return ret + machines__fprintf_dsos_buildid(&self->machines, fp, with_hits);
1136} 1121}
1122
1123size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
1124{
1125 struct perf_evsel *pos;
1126 size_t ret = fprintf(fp, "Aggregated stats:\n");
1127
1128 ret += hists__fprintf_nr_events(&session->hists, fp);
1129
1130 list_for_each_entry(pos, &session->evlist->entries, node) {
1131 ret += fprintf(fp, "%s stats:\n", event_name(pos));
1132 ret += hists__fprintf_nr_events(&pos->hists, fp);
1133 }
1134
1135 return ret;
1136}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index decd83f274fd..b5b148b0aaca 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -34,12 +34,12 @@ struct perf_session {
34 struct thread *last_match; 34 struct thread *last_match;
35 struct machine host_machine; 35 struct machine host_machine;
36 struct rb_root machines; 36 struct rb_root machines;
37 struct rb_root hists_tree; 37 struct perf_evlist *evlist;
38 /* 38 /*
39 * FIXME: should point to the first entry in hists_tree and 39 * FIXME: Need to split this up further, we need global
40 * be a hists instance. Right now its only 'report' 40 * stats + per event stats. 'perf diff' also needs
41 * that is using ->hists_tree while all the rest use 41 * to properly support multiple events in a single
42 * ->hists. 42 * perf.data file.
43 */ 43 */
44 struct hists hists; 44 struct hists hists;
45 u64 sample_type; 45 u64 sample_type;
@@ -51,15 +51,17 @@ struct perf_session {
51 int cwdlen; 51 int cwdlen;
52 char *cwd; 52 char *cwd;
53 struct ordered_samples ordered_samples; 53 struct ordered_samples ordered_samples;
54 char filename[0]; 54 struct callchain_cursor callchain_cursor;
55 char filename[0];
55}; 56};
56 57
57struct perf_event_ops; 58struct perf_event_ops;
58 59
59typedef int (*event_op)(event_t *self, struct sample_data *sample, 60typedef int (*event_op)(union perf_event *self, struct perf_sample *sample,
60 struct perf_session *session); 61 struct perf_session *session);
61typedef int (*event_synth_op)(event_t *self, struct perf_session *session); 62typedef int (*event_synth_op)(union perf_event *self,
62typedef int (*event_op2)(event_t *self, struct perf_session *session, 63 struct perf_session *session);
64typedef int (*event_op2)(union perf_event *self, struct perf_session *session,
63 struct perf_event_ops *ops); 65 struct perf_event_ops *ops);
64 66
65struct perf_event_ops { 67struct perf_event_ops {
@@ -94,10 +96,10 @@ int __perf_session__process_events(struct perf_session *self,
94int perf_session__process_events(struct perf_session *self, 96int perf_session__process_events(struct perf_session *self,
95 struct perf_event_ops *event_ops); 97 struct perf_event_ops *event_ops);
96 98
97struct map_symbol *perf_session__resolve_callchain(struct perf_session *self, 99int perf_session__resolve_callchain(struct perf_session *self,
98 struct thread *thread, 100 struct thread *thread,
99 struct ip_callchain *chain, 101 struct ip_callchain *chain,
100 struct symbol **parent); 102 struct symbol **parent);
101 103
102bool perf_session__has_traces(struct perf_session *self, const char *msg); 104bool perf_session__has_traces(struct perf_session *self, const char *msg);
103 105
@@ -110,8 +112,6 @@ void mem_bswap_64(void *src, int byte_size);
110int perf_session__create_kernel_maps(struct perf_session *self); 112int perf_session__create_kernel_maps(struct perf_session *self);
111 113
112void perf_session__update_sample_type(struct perf_session *self); 114void perf_session__update_sample_type(struct perf_session *self);
113void perf_session__set_sample_id_all(struct perf_session *session, bool value);
114void perf_session__set_sample_type(struct perf_session *session, u64 type);
115void perf_session__remove_thread(struct perf_session *self, struct thread *th); 115void perf_session__remove_thread(struct perf_session *self, struct thread *th);
116 116
117static inline 117static inline
@@ -149,9 +149,14 @@ size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp);
149size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, 149size_t perf_session__fprintf_dsos_buildid(struct perf_session *self,
150 FILE *fp, bool with_hits); 150 FILE *fp, bool with_hits);
151 151
152static inline 152size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp);
153size_t perf_session__fprintf_nr_events(struct perf_session *self, FILE *fp) 153
154static inline int perf_session__parse_sample(struct perf_session *session,
155 const union perf_event *event,
156 struct perf_sample *sample)
154{ 157{
155 return hists__fprintf_nr_events(&self->hists, fp); 158 return perf_event__parse_sample(event, session->sample_type,
159 session->sample_id_all, sample);
156} 160}
161
157#endif /* __PERF_SESSION_H */ 162#endif /* __PERF_SESSION_H */
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
new file mode 100644
index 000000000000..e24ffadb20b2
--- /dev/null
+++ b/tools/perf/util/setup.py
@@ -0,0 +1,19 @@
1#!/usr/bin/python2
2
3from distutils.core import setup, Extension
4
5perf = Extension('perf',
6 sources = ['util/python.c', 'util/ctype.c', 'util/evlist.c',
7 'util/evsel.c', 'util/cpumap.c', 'util/thread_map.c',
8 'util/util.c', 'util/xyarray.c', 'util/cgroup.c'],
9 include_dirs = ['util/include'],
10 extra_compile_args = ['-fno-strict-aliasing', '-Wno-write-strings'])
11
12setup(name='perf',
13 version='0.1',
14 description='Interface with the Linux profiling infrastructure',
15 author='Arnaldo Carvalho de Melo',
16 author_email='acme@redhat.com',
17 license='GPLv2',
18 url='http://perf.wiki.kernel.org',
19 ext_modules=[perf])
diff --git a/tools/perf/util/strfilter.c b/tools/perf/util/strfilter.c
new file mode 100644
index 000000000000..834c8ebfe38e
--- /dev/null
+++ b/tools/perf/util/strfilter.c
@@ -0,0 +1,199 @@
1#include "util.h"
2#include "string.h"
3#include "strfilter.h"
4
5/* Operators */
6static const char *OP_and = "&"; /* Logical AND */
7static const char *OP_or = "|"; /* Logical OR */
8static const char *OP_not = "!"; /* Logical NOT */
9
10#define is_operator(c) ((c) == '|' || (c) == '&' || (c) == '!')
11#define is_separator(c) (is_operator(c) || (c) == '(' || (c) == ')')
12
13static void strfilter_node__delete(struct strfilter_node *self)
14{
15 if (self) {
16 if (self->p && !is_operator(*self->p))
17 free((char *)self->p);
18 strfilter_node__delete(self->l);
19 strfilter_node__delete(self->r);
20 free(self);
21 }
22}
23
24void strfilter__delete(struct strfilter *self)
25{
26 if (self) {
27 strfilter_node__delete(self->root);
28 free(self);
29 }
30}
31
32static const char *get_token(const char *s, const char **e)
33{
34 const char *p;
35
36 while (isspace(*s)) /* Skip spaces */
37 s++;
38
39 if (*s == '\0') {
40 p = s;
41 goto end;
42 }
43
44 p = s + 1;
45 if (!is_separator(*s)) {
46 /* End search */
47retry:
48 while (*p && !is_separator(*p) && !isspace(*p))
49 p++;
50 /* Escape and special case: '!' is also used in glob pattern */
51 if (*(p - 1) == '\\' || (*p == '!' && *(p - 1) == '[')) {
52 p++;
53 goto retry;
54 }
55 }
56end:
57 *e = p;
58 return s;
59}
60
61static struct strfilter_node *strfilter_node__alloc(const char *op,
62 struct strfilter_node *l,
63 struct strfilter_node *r)
64{
65 struct strfilter_node *ret = zalloc(sizeof(struct strfilter_node));
66
67 if (ret) {
68 ret->p = op;
69 ret->l = l;
70 ret->r = r;
71 }
72
73 return ret;
74}
75
76static struct strfilter_node *strfilter_node__new(const char *s,
77 const char **ep)
78{
79 struct strfilter_node root, *cur, *last_op;
80 const char *e;
81
82 if (!s)
83 return NULL;
84
85 memset(&root, 0, sizeof(root));
86 last_op = cur = &root;
87
88 s = get_token(s, &e);
89 while (*s != '\0' && *s != ')') {
90 switch (*s) {
91 case '&': /* Exchg last OP->r with AND */
92 if (!cur->r || !last_op->r)
93 goto error;
94 cur = strfilter_node__alloc(OP_and, last_op->r, NULL);
95 if (!cur)
96 goto nomem;
97 last_op->r = cur;
98 last_op = cur;
99 break;
100 case '|': /* Exchg the root with OR */
101 if (!cur->r || !root.r)
102 goto error;
103 cur = strfilter_node__alloc(OP_or, root.r, NULL);
104 if (!cur)
105 goto nomem;
106 root.r = cur;
107 last_op = cur;
108 break;
109 case '!': /* Add NOT as a leaf node */
110 if (cur->r)
111 goto error;
112 cur->r = strfilter_node__alloc(OP_not, NULL, NULL);
113 if (!cur->r)
114 goto nomem;
115 cur = cur->r;
116 break;
117 case '(': /* Recursively parses inside the parenthesis */
118 if (cur->r)
119 goto error;
120 cur->r = strfilter_node__new(s + 1, &s);
121 if (!s)
122 goto nomem;
123 if (!cur->r || *s != ')')
124 goto error;
125 e = s + 1;
126 break;
127 default:
128 if (cur->r)
129 goto error;
130 cur->r = strfilter_node__alloc(NULL, NULL, NULL);
131 if (!cur->r)
132 goto nomem;
133 cur->r->p = strndup(s, e - s);
134 if (!cur->r->p)
135 goto nomem;
136 }
137 s = get_token(e, &e);
138 }
139 if (!cur->r)
140 goto error;
141 *ep = s;
142 return root.r;
143nomem:
144 s = NULL;
145error:
146 *ep = s;
147 strfilter_node__delete(root.r);
148 return NULL;
149}
150
151/*
152 * Parse filter rule and return new strfilter.
153 * Return NULL if fail, and *ep == NULL if memory allocation failed.
154 */
155struct strfilter *strfilter__new(const char *rules, const char **err)
156{
157 struct strfilter *ret = zalloc(sizeof(struct strfilter));
158 const char *ep = NULL;
159
160 if (ret)
161 ret->root = strfilter_node__new(rules, &ep);
162
163 if (!ret || !ret->root || *ep != '\0') {
164 if (err)
165 *err = ep;
166 strfilter__delete(ret);
167 ret = NULL;
168 }
169
170 return ret;
171}
172
173static bool strfilter_node__compare(struct strfilter_node *self,
174 const char *str)
175{
176 if (!self || !self->p)
177 return false;
178
179 switch (*self->p) {
180 case '|': /* OR */
181 return strfilter_node__compare(self->l, str) ||
182 strfilter_node__compare(self->r, str);
183 case '&': /* AND */
184 return strfilter_node__compare(self->l, str) &&
185 strfilter_node__compare(self->r, str);
186 case '!': /* NOT */
187 return !strfilter_node__compare(self->r, str);
188 default:
189 return strglobmatch(str, self->p);
190 }
191}
192
193/* Return true if STR matches the filter rules */
194bool strfilter__compare(struct strfilter *self, const char *str)
195{
196 if (!self)
197 return false;
198 return strfilter_node__compare(self->root, str);
199}
diff --git a/tools/perf/util/strfilter.h b/tools/perf/util/strfilter.h
new file mode 100644
index 000000000000..00f58a7506de
--- /dev/null
+++ b/tools/perf/util/strfilter.h
@@ -0,0 +1,48 @@
1#ifndef __PERF_STRFILTER_H
2#define __PERF_STRFILTER_H
3/* General purpose glob matching filter */
4
5#include <linux/list.h>
6#include <stdbool.h>
7
8/* A node of string filter */
9struct strfilter_node {
10 struct strfilter_node *l; /* Tree left branche (for &,|) */
11 struct strfilter_node *r; /* Tree right branche (for !,&,|) */
12 const char *p; /* Operator or rule */
13};
14
15/* String filter */
16struct strfilter {
17 struct strfilter_node *root;
18};
19
20/**
21 * strfilter__new - Create a new string filter
22 * @rules: Filter rule, which is a combination of glob expressions.
23 * @err: Pointer which points an error detected on @rules
24 *
25 * Parse @rules and return new strfilter. Return NULL if an error detected.
26 * In that case, *@err will indicate where it is detected, and *@err is NULL
27 * if a memory allocation is failed.
28 */
29struct strfilter *strfilter__new(const char *rules, const char **err);
30
31/**
32 * strfilter__compare - compare given string and a string filter
33 * @self: String filter
34 * @str: target string
35 *
36 * Compare @str and @self. Return true if the str match the rule
37 */
38bool strfilter__compare(struct strfilter *self, const char *str);
39
40/**
41 * strfilter__delete - delete a string filter
42 * @self: String filter to delete
43 *
44 * Delete @self.
45 */
46void strfilter__delete(struct strfilter *self);
47
48#endif
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
index fb737fe9be91..96c866045d60 100644
--- a/tools/perf/util/svghelper.c
+++ b/tools/perf/util/svghelper.c
@@ -456,9 +456,9 @@ void svg_legenda(void)
456 return; 456 return;
457 457
458 svg_legenda_box(0, "Running", "sample"); 458 svg_legenda_box(0, "Running", "sample");
459 svg_legenda_box(100, "Idle","rect.c1"); 459 svg_legenda_box(100, "Idle","c1");
460 svg_legenda_box(200, "Deeper Idle", "rect.c3"); 460 svg_legenda_box(200, "Deeper Idle", "c3");
461 svg_legenda_box(350, "Deepest Idle", "rect.c6"); 461 svg_legenda_box(350, "Deepest Idle", "c6");
462 svg_legenda_box(550, "Sleeping", "process2"); 462 svg_legenda_box(550, "Sleeping", "process2");
463 svg_legenda_box(650, "Waiting for cpu", "waiting"); 463 svg_legenda_box(650, "Waiting for cpu", "waiting");
464 svg_legenda_box(800, "Blocked on IO", "blocked"); 464 svg_legenda_box(800, "Blocked on IO", "blocked");
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 7821d0e6866f..00014e32c288 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -207,7 +207,6 @@ struct dso *dso__new(const char *name)
207 dso__set_short_name(self, self->name); 207 dso__set_short_name(self, self->name);
208 for (i = 0; i < MAP__NR_TYPES; ++i) 208 for (i = 0; i < MAP__NR_TYPES; ++i)
209 self->symbols[i] = self->symbol_names[i] = RB_ROOT; 209 self->symbols[i] = self->symbol_names[i] = RB_ROOT;
210 self->slen_calculated = 0;
211 self->origin = DSO__ORIG_NOT_FOUND; 210 self->origin = DSO__ORIG_NOT_FOUND;
212 self->loaded = 0; 211 self->loaded = 0;
213 self->sorted_by_name = 0; 212 self->sorted_by_name = 0;
@@ -1525,8 +1524,8 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
1525 symbol_conf.symfs, self->long_name); 1524 symbol_conf.symfs, self->long_name);
1526 break; 1525 break;
1527 case DSO__ORIG_GUEST_KMODULE: 1526 case DSO__ORIG_GUEST_KMODULE:
1528 if (map->groups && map->groups->machine) 1527 if (map->groups && machine)
1529 root_dir = map->groups->machine->root_dir; 1528 root_dir = machine->root_dir;
1530 else 1529 else
1531 root_dir = ""; 1530 root_dir = "";
1532 snprintf(name, size, "%s%s%s", symbol_conf.symfs, 1531 snprintf(name, size, "%s%s%s", symbol_conf.symfs,
@@ -1836,7 +1835,7 @@ int dso__load_vmlinux(struct dso *self, struct map *map,
1836 int err = -1, fd; 1835 int err = -1, fd;
1837 char symfs_vmlinux[PATH_MAX]; 1836 char symfs_vmlinux[PATH_MAX];
1838 1837
1839 snprintf(symfs_vmlinux, sizeof(symfs_vmlinux), "%s/%s", 1838 snprintf(symfs_vmlinux, sizeof(symfs_vmlinux), "%s%s",
1840 symbol_conf.symfs, vmlinux); 1839 symbol_conf.symfs, vmlinux);
1841 fd = open(symfs_vmlinux, O_RDONLY); 1840 fd = open(symfs_vmlinux, O_RDONLY);
1842 if (fd < 0) 1841 if (fd < 0)
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 670cd1c88f54..4d7ed09fe332 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -132,7 +132,6 @@ struct dso {
132 struct rb_root symbol_names[MAP__NR_TYPES]; 132 struct rb_root symbol_names[MAP__NR_TYPES];
133 enum dso_kernel_type kernel; 133 enum dso_kernel_type kernel;
134 u8 adjust_symbols:1; 134 u8 adjust_symbols:1;
135 u8 slen_calculated:1;
136 u8 has_build_id:1; 135 u8 has_build_id:1;
137 u8 hit:1; 136 u8 hit:1;
138 u8 annotate_warned:1; 137 u8 annotate_warned:1;
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 00f4eade2e3e..d5d3b22250f3 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -7,61 +7,6 @@
7#include "util.h" 7#include "util.h"
8#include "debug.h" 8#include "debug.h"
9 9
10/* Skip "." and ".." directories */
11static int filter(const struct dirent *dir)
12{
13 if (dir->d_name[0] == '.')
14 return 0;
15 else
16 return 1;
17}
18
19struct thread_map *thread_map__new_by_pid(pid_t pid)
20{
21 struct thread_map *threads;
22 char name[256];
23 int items;
24 struct dirent **namelist = NULL;
25 int i;
26
27 sprintf(name, "/proc/%d/task", pid);
28 items = scandir(name, &namelist, filter, NULL);
29 if (items <= 0)
30 return NULL;
31
32 threads = malloc(sizeof(*threads) + sizeof(pid_t) * items);
33 if (threads != NULL) {
34 for (i = 0; i < items; i++)
35 threads->map[i] = atoi(namelist[i]->d_name);
36 threads->nr = items;
37 }
38
39 for (i=0; i<items; i++)
40 free(namelist[i]);
41 free(namelist);
42
43 return threads;
44}
45
46struct thread_map *thread_map__new_by_tid(pid_t tid)
47{
48 struct thread_map *threads = malloc(sizeof(*threads) + sizeof(pid_t));
49
50 if (threads != NULL) {
51 threads->map[0] = tid;
52 threads->nr = 1;
53 }
54
55 return threads;
56}
57
58struct thread_map *thread_map__new(pid_t pid, pid_t tid)
59{
60 if (pid != -1)
61 return thread_map__new_by_pid(pid);
62 return thread_map__new_by_tid(tid);
63}
64
65static struct thread *thread__new(pid_t pid) 10static struct thread *thread__new(pid_t pid)
66{ 11{
67 struct thread *self = zalloc(sizeof(*self)); 12 struct thread *self = zalloc(sizeof(*self));
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index d7574101054a..e5f2401c1b5e 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -18,24 +18,10 @@ struct thread {
18 int comm_len; 18 int comm_len;
19}; 19};
20 20
21struct thread_map {
22 int nr;
23 int map[];
24};
25
26struct perf_session; 21struct perf_session;
27 22
28void thread__delete(struct thread *self); 23void thread__delete(struct thread *self);
29 24
30struct thread_map *thread_map__new_by_pid(pid_t pid);
31struct thread_map *thread_map__new_by_tid(pid_t tid);
32struct thread_map *thread_map__new(pid_t pid, pid_t tid);
33
34static inline void thread_map__delete(struct thread_map *threads)
35{
36 free(threads);
37}
38
39int thread__set_comm(struct thread *self, const char *comm); 25int thread__set_comm(struct thread *self, const char *comm);
40int thread__comm_len(struct thread *self); 26int thread__comm_len(struct thread *self);
41struct thread *perf_session__findnew(struct perf_session *self, pid_t pid); 27struct thread *perf_session__findnew(struct perf_session *self, pid_t pid);
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
new file mode 100644
index 000000000000..a5df131b77c3
--- /dev/null
+++ b/tools/perf/util/thread_map.c
@@ -0,0 +1,64 @@
1#include <dirent.h>
2#include <stdlib.h>
3#include <stdio.h>
4#include "thread_map.h"
5
6/* Skip "." and ".." directories */
7static int filter(const struct dirent *dir)
8{
9 if (dir->d_name[0] == '.')
10 return 0;
11 else
12 return 1;
13}
14
15struct thread_map *thread_map__new_by_pid(pid_t pid)
16{
17 struct thread_map *threads;
18 char name[256];
19 int items;
20 struct dirent **namelist = NULL;
21 int i;
22
23 sprintf(name, "/proc/%d/task", pid);
24 items = scandir(name, &namelist, filter, NULL);
25 if (items <= 0)
26 return NULL;
27
28 threads = malloc(sizeof(*threads) + sizeof(pid_t) * items);
29 if (threads != NULL) {
30 for (i = 0; i < items; i++)
31 threads->map[i] = atoi(namelist[i]->d_name);
32 threads->nr = items;
33 }
34
35 for (i=0; i<items; i++)
36 free(namelist[i]);
37 free(namelist);
38
39 return threads;
40}
41
42struct thread_map *thread_map__new_by_tid(pid_t tid)
43{
44 struct thread_map *threads = malloc(sizeof(*threads) + sizeof(pid_t));
45
46 if (threads != NULL) {
47 threads->map[0] = tid;
48 threads->nr = 1;
49 }
50
51 return threads;
52}
53
54struct thread_map *thread_map__new(pid_t pid, pid_t tid)
55{
56 if (pid != -1)
57 return thread_map__new_by_pid(pid);
58 return thread_map__new_by_tid(tid);
59}
60
61void thread_map__delete(struct thread_map *threads)
62{
63 free(threads);
64}
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
new file mode 100644
index 000000000000..3cb907311409
--- /dev/null
+++ b/tools/perf/util/thread_map.h
@@ -0,0 +1,15 @@
1#ifndef __PERF_THREAD_MAP_H
2#define __PERF_THREAD_MAP_H
3
4#include <sys/types.h>
5
6struct thread_map {
7 int nr;
8 int map[];
9};
10
11struct thread_map *thread_map__new_by_pid(pid_t pid);
12struct thread_map *thread_map__new_by_tid(pid_t tid);
13struct thread_map *thread_map__new(pid_t pid, pid_t tid);
14void thread_map__delete(struct thread_map *threads);
15#endif /* __PERF_THREAD_MAP_H */
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
new file mode 100644
index 000000000000..75cfe4d45119
--- /dev/null
+++ b/tools/perf/util/top.c
@@ -0,0 +1,238 @@
1/*
2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
3 *
4 * Refactored from builtin-top.c, see that files for further copyright notes.
5 *
6 * Released under the GPL v2. (and only v2, not any later version)
7 */
8
9#include "cpumap.h"
10#include "event.h"
11#include "evlist.h"
12#include "evsel.h"
13#include "parse-events.h"
14#include "symbol.h"
15#include "top.h"
16#include <inttypes.h>
17
18/*
19 * Ordering weight: count-1 * count-2 * ... / count-n
20 */
21static double sym_weight(const struct sym_entry *sym, struct perf_top *top)
22{
23 double weight = sym->snap_count;
24 int counter;
25
26 if (!top->display_weighted)
27 return weight;
28
29 for (counter = 1; counter < top->evlist->nr_entries - 1; counter++)
30 weight *= sym->count[counter];
31
32 weight /= (sym->count[counter] + 1);
33
34 return weight;
35}
36
37static void perf_top__remove_active_sym(struct perf_top *top, struct sym_entry *syme)
38{
39 pthread_mutex_lock(&top->active_symbols_lock);
40 list_del_init(&syme->node);
41 pthread_mutex_unlock(&top->active_symbols_lock);
42}
43
44static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
45{
46 struct rb_node **p = &tree->rb_node;
47 struct rb_node *parent = NULL;
48 struct sym_entry *iter;
49
50 while (*p != NULL) {
51 parent = *p;
52 iter = rb_entry(parent, struct sym_entry, rb_node);
53
54 if (se->weight > iter->weight)
55 p = &(*p)->rb_left;
56 else
57 p = &(*p)->rb_right;
58 }
59
60 rb_link_node(&se->rb_node, parent, p);
61 rb_insert_color(&se->rb_node, tree);
62}
63
64#define SNPRINTF(buf, size, fmt, args...) \
65({ \
66 size_t r = snprintf(buf, size, fmt, ## args); \
67 r > size ? size : r; \
68})
69
70size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
71{
72 struct perf_evsel *counter;
73 float samples_per_sec = top->samples / top->delay_secs;
74 float ksamples_per_sec = top->kernel_samples / top->delay_secs;
75 float esamples_percent = (100.0 * top->exact_samples) / top->samples;
76 size_t ret = 0;
77
78 if (!perf_guest) {
79 ret = SNPRINTF(bf, size,
80 " PerfTop:%8.0f irqs/sec kernel:%4.1f%%"
81 " exact: %4.1f%% [", samples_per_sec,
82 100.0 - (100.0 * ((samples_per_sec - ksamples_per_sec) /
83 samples_per_sec)),
84 esamples_percent);
85 } else {
86 float us_samples_per_sec = top->us_samples / top->delay_secs;
87 float guest_kernel_samples_per_sec = top->guest_kernel_samples / top->delay_secs;
88 float guest_us_samples_per_sec = top->guest_us_samples / top->delay_secs;
89
90 ret = SNPRINTF(bf, size,
91 " PerfTop:%8.0f irqs/sec kernel:%4.1f%% us:%4.1f%%"
92 " guest kernel:%4.1f%% guest us:%4.1f%%"
93 " exact: %4.1f%% [", samples_per_sec,
94 100.0 - (100.0 * ((samples_per_sec - ksamples_per_sec) /
95 samples_per_sec)),
96 100.0 - (100.0 * ((samples_per_sec - us_samples_per_sec) /
97 samples_per_sec)),
98 100.0 - (100.0 * ((samples_per_sec -
99 guest_kernel_samples_per_sec) /
100 samples_per_sec)),
101 100.0 - (100.0 * ((samples_per_sec -
102 guest_us_samples_per_sec) /
103 samples_per_sec)),
104 esamples_percent);
105 }
106
107 if (top->evlist->nr_entries == 1 || !top->display_weighted) {
108 struct perf_evsel *first;
109 first = list_entry(top->evlist->entries.next, struct perf_evsel, node);
110 ret += SNPRINTF(bf + ret, size - ret, "%" PRIu64 "%s ",
111 (uint64_t)first->attr.sample_period,
112 top->freq ? "Hz" : "");
113 }
114
115 if (!top->display_weighted) {
116 ret += SNPRINTF(bf + ret, size - ret, "%s",
117 event_name(top->sym_evsel));
118 } else {
119 /*
120 * Don't let events eat all the space. Leaving 30 bytes
121 * for the rest should be enough.
122 */
123 size_t last_pos = size - 30;
124
125 list_for_each_entry(counter, &top->evlist->entries, node) {
126 ret += SNPRINTF(bf + ret, size - ret, "%s%s",
127 counter->idx ? "/" : "",
128 event_name(counter));
129 if (ret > last_pos) {
130 sprintf(bf + last_pos - 3, "..");
131 ret = last_pos - 1;
132 break;
133 }
134 }
135 }
136
137 ret += SNPRINTF(bf + ret, size - ret, "], ");
138
139 if (top->target_pid != -1)
140 ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %d",
141 top->target_pid);
142 else if (top->target_tid != -1)
143 ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %d",
144 top->target_tid);
145 else
146 ret += SNPRINTF(bf + ret, size - ret, " (all");
147
148 if (top->cpu_list)
149 ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)",
150 top->evlist->cpus->nr > 1 ? "s" : "", top->cpu_list);
151 else {
152 if (top->target_tid != -1)
153 ret += SNPRINTF(bf + ret, size - ret, ")");
154 else
155 ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)",
156 top->evlist->cpus->nr,
157 top->evlist->cpus->nr > 1 ? "s" : "");
158 }
159
160 return ret;
161}
162
163void perf_top__reset_sample_counters(struct perf_top *top)
164{
165 top->samples = top->us_samples = top->kernel_samples =
166 top->exact_samples = top->guest_kernel_samples =
167 top->guest_us_samples = 0;
168}
169
170float perf_top__decay_samples(struct perf_top *top, struct rb_root *root)
171{
172 struct sym_entry *syme, *n;
173 float sum_ksamples = 0.0;
174 int snap = !top->display_weighted ? top->sym_counter : 0, j;
175
176 /* Sort the active symbols */
177 pthread_mutex_lock(&top->active_symbols_lock);
178 syme = list_entry(top->active_symbols.next, struct sym_entry, node);
179 pthread_mutex_unlock(&top->active_symbols_lock);
180
181 top->rb_entries = 0;
182 list_for_each_entry_safe_from(syme, n, &top->active_symbols, node) {
183 syme->snap_count = syme->count[snap];
184 if (syme->snap_count != 0) {
185
186 if ((top->hide_user_symbols &&
187 syme->origin == PERF_RECORD_MISC_USER) ||
188 (top->hide_kernel_symbols &&
189 syme->origin == PERF_RECORD_MISC_KERNEL)) {
190 perf_top__remove_active_sym(top, syme);
191 continue;
192 }
193 syme->weight = sym_weight(syme, top);
194
195 if ((int)syme->snap_count >= top->count_filter) {
196 rb_insert_active_sym(root, syme);
197 ++top->rb_entries;
198 }
199 sum_ksamples += syme->snap_count;
200
201 for (j = 0; j < top->evlist->nr_entries; j++)
202 syme->count[j] = top->zero ? 0 : syme->count[j] * 7 / 8;
203 } else
204 perf_top__remove_active_sym(top, syme);
205 }
206
207 return sum_ksamples;
208}
209
210/*
211 * Find the longest symbol name that will be displayed
212 */
213void perf_top__find_widths(struct perf_top *top, struct rb_root *root,
214 int *dso_width, int *dso_short_width, int *sym_width)
215{
216 struct rb_node *nd;
217 int printed = 0;
218
219 *sym_width = *dso_width = *dso_short_width = 0;
220
221 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
222 struct sym_entry *syme = rb_entry(nd, struct sym_entry, rb_node);
223 struct symbol *sym = sym_entry__symbol(syme);
224
225 if (++printed > top->print_entries ||
226 (int)syme->snap_count < top->count_filter)
227 continue;
228
229 if (syme->map->dso->long_name_len > *dso_width)
230 *dso_width = syme->map->dso->long_name_len;
231
232 if (syme->map->dso->short_name_len > *dso_short_width)
233 *dso_short_width = syme->map->dso->short_name_len;
234
235 if (sym->namelen > *sym_width)
236 *sym_width = sym->namelen;
237 }
238}
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
new file mode 100644
index 000000000000..96d1cb78af01
--- /dev/null
+++ b/tools/perf/util/top.h
@@ -0,0 +1,66 @@
1#ifndef __PERF_TOP_H
2#define __PERF_TOP_H 1
3
4#include "types.h"
5#include "../perf.h"
6#include <stddef.h>
7#include <pthread.h>
8#include <linux/list.h>
9#include <linux/rbtree.h>
10
11struct perf_evlist;
12struct perf_evsel;
13
14struct sym_entry {
15 struct rb_node rb_node;
16 struct list_head node;
17 unsigned long snap_count;
18 double weight;
19 int skip;
20 u8 origin;
21 struct map *map;
22 unsigned long count[0];
23};
24
25static inline struct symbol *sym_entry__symbol(struct sym_entry *self)
26{
27 return ((void *)self) + symbol_conf.priv_size;
28}
29
30struct perf_top {
31 struct perf_evlist *evlist;
32 /*
33 * Symbols will be added here in perf_event__process_sample and will
34 * get out after decayed.
35 */
36 struct list_head active_symbols;
37 pthread_mutex_t active_symbols_lock;
38 pthread_cond_t active_symbols_cond;
39 u64 samples;
40 u64 kernel_samples, us_samples;
41 u64 exact_samples;
42 u64 guest_us_samples, guest_kernel_samples;
43 int print_entries, count_filter, delay_secs;
44 int display_weighted, freq, rb_entries, sym_counter;
45 pid_t target_pid, target_tid;
46 bool hide_kernel_symbols, hide_user_symbols, zero;
47 const char *cpu_list;
48 struct sym_entry *sym_filter_entry;
49 struct perf_evsel *sym_evsel;
50};
51
52size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size);
53void perf_top__reset_sample_counters(struct perf_top *top);
54float perf_top__decay_samples(struct perf_top *top, struct rb_root *root);
55void perf_top__find_widths(struct perf_top *top, struct rb_root *root,
56 int *dso_width, int *dso_short_width, int *sym_width);
57
58#ifdef NO_NEWT_SUPPORT
59static inline int perf_top__tui_browser(struct perf_top *top __used)
60{
61 return 0;
62}
63#else
64int perf_top__tui_browser(struct perf_top *top);
65#endif
66#endif /* __PERF_TOP_H */
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 73a02223c629..d8e622dd738a 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -153,7 +153,7 @@ void parse_proc_kallsyms(char *file, unsigned int size __unused)
153 char *next = NULL; 153 char *next = NULL;
154 char *addr_str; 154 char *addr_str;
155 char ch; 155 char ch;
156 int ret; 156 int ret __used;
157 int i; 157 int i;
158 158
159 line = strtok_r(file, "\n", &next); 159 line = strtok_r(file, "\n", &next);
diff --git a/tools/perf/util/ui/browser.c b/tools/perf/util/ui/browser.c
index 8bc010edca25..611219f80680 100644
--- a/tools/perf/util/ui/browser.c
+++ b/tools/perf/util/ui/browser.c
@@ -1,4 +1,5 @@
1#include "libslang.h" 1#include "libslang.h"
2#include "ui.h"
2#include <linux/compiler.h> 3#include <linux/compiler.h>
3#include <linux/list.h> 4#include <linux/list.h>
4#include <linux/rbtree.h> 5#include <linux/rbtree.h>
@@ -156,6 +157,20 @@ void ui_browser__add_exit_keys(struct ui_browser *self, int keys[])
156 } 157 }
157} 158}
158 159
160void __ui_browser__show_title(struct ui_browser *browser, const char *title)
161{
162 SLsmg_gotorc(0, 0);
163 ui_browser__set_color(browser, NEWT_COLORSET_ROOT);
164 slsmg_write_nstring(title, browser->width);
165}
166
167void ui_browser__show_title(struct ui_browser *browser, const char *title)
168{
169 pthread_mutex_lock(&ui__lock);
170 __ui_browser__show_title(browser, title);
171 pthread_mutex_unlock(&ui__lock);
172}
173
159int ui_browser__show(struct ui_browser *self, const char *title, 174int ui_browser__show(struct ui_browser *self, const char *title,
160 const char *helpline, ...) 175 const char *helpline, ...)
161{ 176{
@@ -178,9 +193,8 @@ int ui_browser__show(struct ui_browser *self, const char *title,
178 if (self->sb == NULL) 193 if (self->sb == NULL)
179 return -1; 194 return -1;
180 195
181 SLsmg_gotorc(0, 0); 196 pthread_mutex_lock(&ui__lock);
182 ui_browser__set_color(self, NEWT_COLORSET_ROOT); 197 __ui_browser__show_title(self, title);
183 slsmg_write_nstring(title, self->width);
184 198
185 ui_browser__add_exit_keys(self, keys); 199 ui_browser__add_exit_keys(self, keys);
186 newtFormAddComponent(self->form, self->sb); 200 newtFormAddComponent(self->form, self->sb);
@@ -188,25 +202,30 @@ int ui_browser__show(struct ui_browser *self, const char *title,
188 va_start(ap, helpline); 202 va_start(ap, helpline);
189 ui_helpline__vpush(helpline, ap); 203 ui_helpline__vpush(helpline, ap);
190 va_end(ap); 204 va_end(ap);
205 pthread_mutex_unlock(&ui__lock);
191 return 0; 206 return 0;
192} 207}
193 208
194void ui_browser__hide(struct ui_browser *self) 209void ui_browser__hide(struct ui_browser *self)
195{ 210{
211 pthread_mutex_lock(&ui__lock);
196 newtFormDestroy(self->form); 212 newtFormDestroy(self->form);
197 self->form = NULL; 213 self->form = NULL;
198 ui_helpline__pop(); 214 ui_helpline__pop();
215 pthread_mutex_unlock(&ui__lock);
199} 216}
200 217
201int ui_browser__refresh(struct ui_browser *self) 218int ui_browser__refresh(struct ui_browser *self)
202{ 219{
203 int row; 220 int row;
204 221
222 pthread_mutex_lock(&ui__lock);
205 newtScrollbarSet(self->sb, self->index, self->nr_entries - 1); 223 newtScrollbarSet(self->sb, self->index, self->nr_entries - 1);
206 row = self->refresh(self); 224 row = self->refresh(self);
207 ui_browser__set_color(self, HE_COLORSET_NORMAL); 225 ui_browser__set_color(self, HE_COLORSET_NORMAL);
208 SLsmg_fill_region(self->y + row, self->x, 226 SLsmg_fill_region(self->y + row, self->x,
209 self->height - row, self->width, ' '); 227 self->height - row, self->width, ' ');
228 pthread_mutex_unlock(&ui__lock);
210 229
211 return 0; 230 return 0;
212} 231}
diff --git a/tools/perf/util/ui/browser.h b/tools/perf/util/ui/browser.h
index 0dc7e4da36f5..fc63dda10910 100644
--- a/tools/perf/util/ui/browser.h
+++ b/tools/perf/util/ui/browser.h
@@ -24,7 +24,6 @@ struct ui_browser {
24 u32 nr_entries; 24 u32 nr_entries;
25}; 25};
26 26
27
28void ui_browser__set_color(struct ui_browser *self, int color); 27void ui_browser__set_color(struct ui_browser *self, int color);
29void ui_browser__set_percent_color(struct ui_browser *self, 28void ui_browser__set_percent_color(struct ui_browser *self,
30 double percent, bool current); 29 double percent, bool current);
@@ -35,6 +34,8 @@ void ui_browser__reset_index(struct ui_browser *self);
35void ui_browser__gotorc(struct ui_browser *self, int y, int x); 34void ui_browser__gotorc(struct ui_browser *self, int y, int x);
36void ui_browser__add_exit_key(struct ui_browser *self, int key); 35void ui_browser__add_exit_key(struct ui_browser *self, int key);
37void ui_browser__add_exit_keys(struct ui_browser *self, int keys[]); 36void ui_browser__add_exit_keys(struct ui_browser *self, int keys[]);
37void __ui_browser__show_title(struct ui_browser *browser, const char *title);
38void ui_browser__show_title(struct ui_browser *browser, const char *title);
38int ui_browser__show(struct ui_browser *self, const char *title, 39int ui_browser__show(struct ui_browser *self, const char *title,
39 const char *helpline, ...); 40 const char *helpline, ...);
40void ui_browser__hide(struct ui_browser *self); 41void ui_browser__hide(struct ui_browser *self);
diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c
index 82b78f99251b..8c17a8730e4a 100644
--- a/tools/perf/util/ui/browsers/annotate.c
+++ b/tools/perf/util/ui/browsers/annotate.c
@@ -1,9 +1,12 @@
1#include "../browser.h" 1#include "../browser.h"
2#include "../helpline.h" 2#include "../helpline.h"
3#include "../libslang.h" 3#include "../libslang.h"
4#include "../../annotate.h"
4#include "../../hist.h" 5#include "../../hist.h"
5#include "../../sort.h" 6#include "../../sort.h"
6#include "../../symbol.h" 7#include "../../symbol.h"
8#include "../../annotate.h"
9#include <pthread.h>
7 10
8static void ui__error_window(const char *fmt, ...) 11static void ui__error_window(const char *fmt, ...)
9{ 12{
@@ -42,8 +45,6 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro
42 struct objdump_line_rb_node *olrb = objdump_line__rb(ol); 45 struct objdump_line_rb_node *olrb = objdump_line__rb(ol);
43 ui_browser__set_percent_color(self, olrb->percent, current_entry); 46 ui_browser__set_percent_color(self, olrb->percent, current_entry);
44 slsmg_printf(" %7.2f ", olrb->percent); 47 slsmg_printf(" %7.2f ", olrb->percent);
45 if (!current_entry)
46 ui_browser__set_color(self, HE_COLORSET_CODE);
47 } else { 48 } else {
48 ui_browser__set_percent_color(self, 0, current_entry); 49 ui_browser__set_percent_color(self, 0, current_entry);
49 slsmg_write_nstring(" ", 9); 50 slsmg_write_nstring(" ", 9);
@@ -55,35 +56,40 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro
55 slsmg_write_nstring(" ", width - 18); 56 slsmg_write_nstring(" ", width - 18);
56 else 57 else
57 slsmg_write_nstring(ol->line, width - 18); 58 slsmg_write_nstring(ol->line, width - 18);
59
60 if (!current_entry)
61 ui_browser__set_color(self, HE_COLORSET_CODE);
58} 62}
59 63
60static double objdump_line__calc_percent(struct objdump_line *self, 64static double objdump_line__calc_percent(struct objdump_line *self,
61 struct list_head *head, 65 struct symbol *sym, int evidx)
62 struct symbol *sym)
63{ 66{
64 double percent = 0.0; 67 double percent = 0.0;
65 68
66 if (self->offset != -1) { 69 if (self->offset != -1) {
67 int len = sym->end - sym->start; 70 int len = sym->end - sym->start;
68 unsigned int hits = 0; 71 unsigned int hits = 0;
69 struct sym_priv *priv = symbol__priv(sym); 72 struct annotation *notes = symbol__annotation(sym);
70 struct sym_ext *sym_ext = priv->ext; 73 struct source_line *src_line = notes->src->lines;
71 struct sym_hist *h = priv->hist; 74 struct sym_hist *h = annotation__histogram(notes, evidx);
72 s64 offset = self->offset; 75 s64 offset = self->offset;
73 struct objdump_line *next = objdump__get_next_ip_line(head, self); 76 struct objdump_line *next;
74
75 77
78 next = objdump__get_next_ip_line(&notes->src->source, self);
76 while (offset < (s64)len && 79 while (offset < (s64)len &&
77 (next == NULL || offset < next->offset)) { 80 (next == NULL || offset < next->offset)) {
78 if (sym_ext) { 81 if (src_line) {
79 percent += sym_ext[offset].percent; 82 percent += src_line[offset].percent;
80 } else 83 } else
81 hits += h->ip[offset]; 84 hits += h->addr[offset];
82 85
83 ++offset; 86 ++offset;
84 } 87 }
85 88 /*
86 if (sym_ext == NULL && h->sum) 89 * If the percentage wasn't already calculated in
90 * symbol__get_source_line, do it now:
91 */
92 if (src_line == NULL && h->sum)
87 percent = 100.0 * hits / h->sum; 93 percent = 100.0 * hits / h->sum;
88 } 94 }
89 95
@@ -133,103 +139,161 @@ static void annotate_browser__set_top(struct annotate_browser *self,
133 self->curr_hot = nd; 139 self->curr_hot = nd;
134} 140}
135 141
136static int annotate_browser__run(struct annotate_browser *self) 142static void annotate_browser__calc_percent(struct annotate_browser *browser,
143 int evidx)
137{ 144{
138 struct rb_node *nd; 145 struct symbol *sym = browser->b.priv;
139 struct hist_entry *he = self->b.priv; 146 struct annotation *notes = symbol__annotation(sym);
140 int key; 147 struct objdump_line *pos;
141 148
142 if (ui_browser__show(&self->b, he->ms.sym->name, 149 browser->entries = RB_ROOT;
143 "<-, -> or ESC: exit, TAB/shift+TAB: cycle thru samples") < 0) 150
144 return -1; 151 pthread_mutex_lock(&notes->lock);
152
153 list_for_each_entry(pos, &notes->src->source, node) {
154 struct objdump_line_rb_node *rbpos = objdump_line__rb(pos);
155 rbpos->percent = objdump_line__calc_percent(pos, sym, evidx);
156 if (rbpos->percent < 0.01) {
157 RB_CLEAR_NODE(&rbpos->rb_node);
158 continue;
159 }
160 objdump__insert_line(&browser->entries, rbpos);
161 }
162 pthread_mutex_unlock(&notes->lock);
163
164 browser->curr_hot = rb_last(&browser->entries);
165}
166
167static int annotate_browser__run(struct annotate_browser *self, int evidx,
168 int refresh)
169{
170 struct rb_node *nd = NULL;
171 struct symbol *sym = self->b.priv;
145 /* 172 /*
146 * To allow builtin-annotate to cycle thru multiple symbols by 173 * RIGHT To allow builtin-annotate to cycle thru multiple symbols by
147 * examining the exit key for this function. 174 * examining the exit key for this function.
148 */ 175 */
149 ui_browser__add_exit_key(&self->b, NEWT_KEY_RIGHT); 176 int exit_keys[] = { 'H', NEWT_KEY_TAB, NEWT_KEY_UNTAB,
177 NEWT_KEY_RIGHT, 0 };
178 int key;
179
180 if (ui_browser__show(&self->b, sym->name,
181 "<-, -> or ESC: exit, TAB/shift+TAB: "
182 "cycle hottest lines, H: Hottest") < 0)
183 return -1;
184
185 ui_browser__add_exit_keys(&self->b, exit_keys);
186 annotate_browser__calc_percent(self, evidx);
187
188 if (self->curr_hot)
189 annotate_browser__set_top(self, self->curr_hot);
150 190
151 nd = self->curr_hot; 191 nd = self->curr_hot;
152 if (nd) { 192
153 int tabs[] = { NEWT_KEY_TAB, NEWT_KEY_UNTAB, 0 }; 193 if (refresh != 0)
154 ui_browser__add_exit_keys(&self->b, tabs); 194 newtFormSetTimer(self->b.form, refresh);
155 }
156 195
157 while (1) { 196 while (1) {
158 key = ui_browser__run(&self->b); 197 key = ui_browser__run(&self->b);
159 198
199 if (refresh != 0) {
200 annotate_browser__calc_percent(self, evidx);
201 /*
202 * Current line focus got out of the list of most active
203 * lines, NULL it so that if TAB|UNTAB is pressed, we
204 * move to curr_hot (current hottest line).
205 */
206 if (nd != NULL && RB_EMPTY_NODE(nd))
207 nd = NULL;
208 }
209
160 switch (key) { 210 switch (key) {
211 case -1:
212 /*
213 * FIXME we need to check if it was
214 * es.reason == NEWT_EXIT_TIMER
215 */
216 if (refresh != 0)
217 symbol__annotate_decay_histogram(sym, evidx);
218 continue;
161 case NEWT_KEY_TAB: 219 case NEWT_KEY_TAB:
162 nd = rb_prev(nd); 220 if (nd != NULL) {
163 if (nd == NULL) 221 nd = rb_prev(nd);
164 nd = rb_last(&self->entries); 222 if (nd == NULL)
165 annotate_browser__set_top(self, nd); 223 nd = rb_last(&self->entries);
224 } else
225 nd = self->curr_hot;
166 break; 226 break;
167 case NEWT_KEY_UNTAB: 227 case NEWT_KEY_UNTAB:
168 nd = rb_next(nd); 228 if (nd != NULL)
169 if (nd == NULL) 229 nd = rb_next(nd);
170 nd = rb_first(&self->entries); 230 if (nd == NULL)
171 annotate_browser__set_top(self, nd); 231 nd = rb_first(&self->entries);
232 else
233 nd = self->curr_hot;
234 break;
235 case 'H':
236 nd = self->curr_hot;
172 break; 237 break;
173 default: 238 default:
174 goto out; 239 goto out;
175 } 240 }
241
242 if (nd != NULL)
243 annotate_browser__set_top(self, nd);
176 } 244 }
177out: 245out:
178 ui_browser__hide(&self->b); 246 ui_browser__hide(&self->b);
179 return key; 247 return key;
180} 248}
181 249
182int hist_entry__tui_annotate(struct hist_entry *self) 250int hist_entry__tui_annotate(struct hist_entry *he, int evidx)
251{
252 return symbol__tui_annotate(he->ms.sym, he->ms.map, evidx, 0);
253}
254
255int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
256 int refresh)
183{ 257{
184 struct objdump_line *pos, *n; 258 struct objdump_line *pos, *n;
185 struct objdump_line_rb_node *rbpos; 259 struct annotation *notes = symbol__annotation(sym);
186 LIST_HEAD(head);
187 struct annotate_browser browser = { 260 struct annotate_browser browser = {
188 .b = { 261 .b = {
189 .entries = &head, 262 .entries = &notes->src->source,
190 .refresh = ui_browser__list_head_refresh, 263 .refresh = ui_browser__list_head_refresh,
191 .seek = ui_browser__list_head_seek, 264 .seek = ui_browser__list_head_seek,
192 .write = annotate_browser__write, 265 .write = annotate_browser__write,
193 .priv = self, 266 .priv = sym,
194 }, 267 },
195 }; 268 };
196 int ret; 269 int ret;
197 270
198 if (self->ms.sym == NULL) 271 if (sym == NULL)
199 return -1; 272 return -1;
200 273
201 if (self->ms.map->dso->annotate_warned) 274 if (map->dso->annotate_warned)
202 return -1; 275 return -1;
203 276
204 if (hist_entry__annotate(self, &head, sizeof(*rbpos)) < 0) { 277 if (symbol__annotate(sym, map, sizeof(struct objdump_line_rb_node)) < 0) {
205 ui__error_window(ui_helpline__last_msg); 278 ui__error_window(ui_helpline__last_msg);
206 return -1; 279 return -1;
207 } 280 }
208 281
209 ui_helpline__push("Press <- or ESC to exit"); 282 ui_helpline__push("Press <- or ESC to exit");
210 283
211 list_for_each_entry(pos, &head, node) { 284 list_for_each_entry(pos, &notes->src->source, node) {
285 struct objdump_line_rb_node *rbpos;
212 size_t line_len = strlen(pos->line); 286 size_t line_len = strlen(pos->line);
287
213 if (browser.b.width < line_len) 288 if (browser.b.width < line_len)
214 browser.b.width = line_len; 289 browser.b.width = line_len;
215 rbpos = objdump_line__rb(pos); 290 rbpos = objdump_line__rb(pos);
216 rbpos->idx = browser.b.nr_entries++; 291 rbpos->idx = browser.b.nr_entries++;
217 rbpos->percent = objdump_line__calc_percent(pos, &head, self->ms.sym);
218 if (rbpos->percent < 0.01)
219 continue;
220 objdump__insert_line(&browser.entries, rbpos);
221 } 292 }
222 293
223 /*
224 * Position the browser at the hottest line.
225 */
226 browser.curr_hot = rb_last(&browser.entries);
227 if (browser.curr_hot)
228 annotate_browser__set_top(&browser, browser.curr_hot);
229
230 browser.b.width += 18; /* Percentage */ 294 browser.b.width += 18; /* Percentage */
231 ret = annotate_browser__run(&browser); 295 ret = annotate_browser__run(&browser, evidx, refresh);
232 list_for_each_entry_safe(pos, n, &head, node) { 296 list_for_each_entry_safe(pos, n, &notes->src->source, node) {
233 list_del(&pos->node); 297 list_del(&pos->node);
234 objdump_line__free(pos); 298 objdump_line__free(pos);
235 } 299 }
diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c
index 60c463c16028..798efdca3ead 100644
--- a/tools/perf/util/ui/browsers/hists.c
+++ b/tools/perf/util/ui/browsers/hists.c
@@ -7,6 +7,8 @@
7#include <newt.h> 7#include <newt.h>
8#include <linux/rbtree.h> 8#include <linux/rbtree.h>
9 9
10#include "../../evsel.h"
11#include "../../evlist.h"
10#include "../../hist.h" 12#include "../../hist.h"
11#include "../../pstack.h" 13#include "../../pstack.h"
12#include "../../sort.h" 14#include "../../sort.h"
@@ -292,7 +294,8 @@ static int hist_browser__run(struct hist_browser *self, const char *title)
292{ 294{
293 int key; 295 int key;
294 int exit_keys[] = { 'a', '?', 'h', 'C', 'd', 'D', 'E', 't', 296 int exit_keys[] = { 'a', '?', 'h', 'C', 'd', 'D', 'E', 't',
295 NEWT_KEY_ENTER, NEWT_KEY_RIGHT, NEWT_KEY_LEFT, 0, }; 297 NEWT_KEY_ENTER, NEWT_KEY_RIGHT, NEWT_KEY_LEFT,
298 NEWT_KEY_TAB, NEWT_KEY_UNTAB, 0, };
296 299
297 self->b.entries = &self->hists->entries; 300 self->b.entries = &self->hists->entries;
298 self->b.nr_entries = self->hists->nr_entries; 301 self->b.nr_entries = self->hists->nr_entries;
@@ -377,7 +380,7 @@ static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *self,
377 while (node) { 380 while (node) {
378 struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node); 381 struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
379 struct rb_node *next = rb_next(node); 382 struct rb_node *next = rb_next(node);
380 u64 cumul = cumul_hits(child); 383 u64 cumul = callchain_cumul_hits(child);
381 struct callchain_list *chain; 384 struct callchain_list *chain;
382 char folded_sign = ' '; 385 char folded_sign = ' ';
383 int first = true; 386 int first = true;
@@ -638,6 +641,9 @@ static void ui_browser__hists_seek(struct ui_browser *self,
638 struct rb_node *nd; 641 struct rb_node *nd;
639 bool first = true; 642 bool first = true;
640 643
644 if (self->nr_entries == 0)
645 return;
646
641 switch (whence) { 647 switch (whence) {
642 case SEEK_SET: 648 case SEEK_SET:
643 nd = hists__filter_entries(rb_first(self->entries)); 649 nd = hists__filter_entries(rb_first(self->entries));
@@ -797,8 +803,11 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size,
797 return printed; 803 return printed;
798} 804}
799 805
800int hists__browse(struct hists *self, const char *helpline, const char *ev_name) 806static int perf_evsel__hists_browse(struct perf_evsel *evsel,
807 const char *helpline, const char *ev_name,
808 bool left_exits)
801{ 809{
810 struct hists *self = &evsel->hists;
802 struct hist_browser *browser = hist_browser__new(self); 811 struct hist_browser *browser = hist_browser__new(self);
803 struct pstack *fstack; 812 struct pstack *fstack;
804 const struct thread *thread_filter = NULL; 813 const struct thread *thread_filter = NULL;
@@ -818,8 +827,8 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
818 hists__browser_title(self, msg, sizeof(msg), ev_name, 827 hists__browser_title(self, msg, sizeof(msg), ev_name,
819 dso_filter, thread_filter); 828 dso_filter, thread_filter);
820 while (1) { 829 while (1) {
821 const struct thread *thread; 830 const struct thread *thread = NULL;
822 const struct dso *dso; 831 const struct dso *dso = NULL;
823 char *options[16]; 832 char *options[16];
824 int nr_options = 0, choice = 0, i, 833 int nr_options = 0, choice = 0, i,
825 annotate = -2, zoom_dso = -2, zoom_thread = -2, 834 annotate = -2, zoom_dso = -2, zoom_thread = -2,
@@ -827,8 +836,10 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
827 836
828 key = hist_browser__run(browser, msg); 837 key = hist_browser__run(browser, msg);
829 838
830 thread = hist_browser__selected_thread(browser); 839 if (browser->he_selection != NULL) {
831 dso = browser->selection->map ? browser->selection->map->dso : NULL; 840 thread = hist_browser__selected_thread(browser);
841 dso = browser->selection->map ? browser->selection->map->dso : NULL;
842 }
832 843
833 switch (key) { 844 switch (key) {
834 case NEWT_KEY_TAB: 845 case NEWT_KEY_TAB:
@@ -839,7 +850,8 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
839 */ 850 */
840 goto out_free_stack; 851 goto out_free_stack;
841 case 'a': 852 case 'a':
842 if (browser->selection->map == NULL && 853 if (browser->selection == NULL ||
854 browser->selection->map == NULL ||
843 browser->selection->map->dso->annotate_warned) 855 browser->selection->map->dso->annotate_warned)
844 continue; 856 continue;
845 goto do_annotate; 857 goto do_annotate;
@@ -858,6 +870,7 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
858 "E Expand all callchains\n" 870 "E Expand all callchains\n"
859 "d Zoom into current DSO\n" 871 "d Zoom into current DSO\n"
860 "t Zoom into current Thread\n" 872 "t Zoom into current Thread\n"
873 "TAB/UNTAB Switch events\n"
861 "q/CTRL+C Exit browser"); 874 "q/CTRL+C Exit browser");
862 continue; 875 continue;
863 case NEWT_KEY_ENTER: 876 case NEWT_KEY_ENTER:
@@ -867,8 +880,14 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
867 case NEWT_KEY_LEFT: { 880 case NEWT_KEY_LEFT: {
868 const void *top; 881 const void *top;
869 882
870 if (pstack__empty(fstack)) 883 if (pstack__empty(fstack)) {
884 /*
885 * Go back to the perf_evsel_menu__run or other user
886 */
887 if (left_exits)
888 goto out_free_stack;
871 continue; 889 continue;
890 }
872 top = pstack__pop(fstack); 891 top = pstack__pop(fstack);
873 if (top == &dso_filter) 892 if (top == &dso_filter)
874 goto zoom_out_dso; 893 goto zoom_out_dso;
@@ -877,14 +896,16 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
877 continue; 896 continue;
878 } 897 }
879 case NEWT_KEY_ESCAPE: 898 case NEWT_KEY_ESCAPE:
880 if (!ui__dialog_yesno("Do you really want to exit?")) 899 if (!left_exits &&
900 !ui__dialog_yesno("Do you really want to exit?"))
881 continue; 901 continue;
882 /* Fall thru */ 902 /* Fall thru */
883 default: 903 default:
884 goto out_free_stack; 904 goto out_free_stack;
885 } 905 }
886 906
887 if (browser->selection->sym != NULL && 907 if (browser->selection != NULL &&
908 browser->selection->sym != NULL &&
888 !browser->selection->map->dso->annotate_warned && 909 !browser->selection->map->dso->annotate_warned &&
889 asprintf(&options[nr_options], "Annotate %s", 910 asprintf(&options[nr_options], "Annotate %s",
890 browser->selection->sym->name) > 0) 911 browser->selection->sym->name) > 0)
@@ -903,7 +924,8 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
903 (dso->kernel ? "the Kernel" : dso->short_name)) > 0) 924 (dso->kernel ? "the Kernel" : dso->short_name)) > 0)
904 zoom_dso = nr_options++; 925 zoom_dso = nr_options++;
905 926
906 if (browser->selection->map != NULL && 927 if (browser->selection != NULL &&
928 browser->selection->map != NULL &&
907 asprintf(&options[nr_options], "Browse map details") > 0) 929 asprintf(&options[nr_options], "Browse map details") > 0)
908 browse_map = nr_options++; 930 browse_map = nr_options++;
909 931
@@ -923,19 +945,11 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
923 if (choice == annotate) { 945 if (choice == annotate) {
924 struct hist_entry *he; 946 struct hist_entry *he;
925do_annotate: 947do_annotate:
926 if (browser->selection->map->dso->origin == DSO__ORIG_KERNEL) {
927 browser->selection->map->dso->annotate_warned = 1;
928 ui_helpline__puts("No vmlinux file found, can't "
929 "annotate with just a "
930 "kallsyms file");
931 continue;
932 }
933
934 he = hist_browser__selected_entry(browser); 948 he = hist_browser__selected_entry(browser);
935 if (he == NULL) 949 if (he == NULL)
936 continue; 950 continue;
937 951
938 hist_entry__tui_annotate(he); 952 hist_entry__tui_annotate(he, evsel->idx);
939 } else if (choice == browse_map) 953 } else if (choice == browse_map)
940 map__browse(browser->selection->map); 954 map__browse(browser->selection->map);
941 else if (choice == zoom_dso) { 955 else if (choice == zoom_dso) {
@@ -984,30 +998,141 @@ out:
984 return key; 998 return key;
985} 999}
986 1000
987int hists__tui_browse_tree(struct rb_root *self, const char *help) 1001struct perf_evsel_menu {
1002 struct ui_browser b;
1003 struct perf_evsel *selection;
1004};
1005
1006static void perf_evsel_menu__write(struct ui_browser *browser,
1007 void *entry, int row)
1008{
1009 struct perf_evsel_menu *menu = container_of(browser,
1010 struct perf_evsel_menu, b);
1011 struct perf_evsel *evsel = list_entry(entry, struct perf_evsel, node);
1012 bool current_entry = ui_browser__is_current_entry(browser, row);
1013 unsigned long nr_events = evsel->hists.stats.nr_events[PERF_RECORD_SAMPLE];
1014 const char *ev_name = event_name(evsel);
1015 char bf[256], unit;
1016
1017 ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
1018 HE_COLORSET_NORMAL);
1019
1020 nr_events = convert_unit(nr_events, &unit);
1021 snprintf(bf, sizeof(bf), "%lu%c%s%s", nr_events,
1022 unit, unit == ' ' ? "" : " ", ev_name);
1023 slsmg_write_nstring(bf, browser->width);
1024
1025 if (current_entry)
1026 menu->selection = evsel;
1027}
1028
1029static int perf_evsel_menu__run(struct perf_evsel_menu *menu, const char *help)
988{ 1030{
989 struct rb_node *first = rb_first(self), *nd = first, *next; 1031 int exit_keys[] = { NEWT_KEY_ENTER, NEWT_KEY_RIGHT, 0, };
990 int key = 0; 1032 struct perf_evlist *evlist = menu->b.priv;
1033 struct perf_evsel *pos;
1034 const char *ev_name, *title = "Available samples";
1035 int key;
1036
1037 if (ui_browser__show(&menu->b, title,
1038 "ESC: exit, ENTER|->: Browse histograms") < 0)
1039 return -1;
1040
1041 ui_browser__add_exit_keys(&menu->b, exit_keys);
991 1042
992 while (nd) { 1043 while (1) {
993 struct hists *hists = rb_entry(nd, struct hists, rb_node); 1044 key = ui_browser__run(&menu->b);
994 const char *ev_name = __event_name(hists->type, hists->config);
995 1045
996 key = hists__browse(hists, help, ev_name);
997 switch (key) { 1046 switch (key) {
998 case NEWT_KEY_TAB: 1047 case NEWT_KEY_RIGHT:
999 next = rb_next(nd); 1048 case NEWT_KEY_ENTER:
1000 if (next) 1049 if (!menu->selection)
1001 nd = next; 1050 continue;
1051 pos = menu->selection;
1052browse_hists:
1053 ev_name = event_name(pos);
1054 key = perf_evsel__hists_browse(pos, help, ev_name, true);
1055 ui_browser__show_title(&menu->b, title);
1002 break; 1056 break;
1003 case NEWT_KEY_UNTAB: 1057 case NEWT_KEY_LEFT:
1004 if (nd == first) 1058 continue;
1059 case NEWT_KEY_ESCAPE:
1060 if (!ui__dialog_yesno("Do you really want to exit?"))
1005 continue; 1061 continue;
1006 nd = rb_prev(nd); 1062 /* Fall thru */
1063 default:
1064 goto out;
1065 }
1066
1067 switch (key) {
1068 case NEWT_KEY_TAB:
1069 if (pos->node.next == &evlist->entries)
1070 pos = list_entry(evlist->entries.next, struct perf_evsel, node);
1071 else
1072 pos = list_entry(pos->node.next, struct perf_evsel, node);
1073 goto browse_hists;
1074 case NEWT_KEY_UNTAB:
1075 if (pos->node.prev == &evlist->entries)
1076 pos = list_entry(evlist->entries.prev, struct perf_evsel, node);
1077 else
1078 pos = list_entry(pos->node.prev, struct perf_evsel, node);
1079 goto browse_hists;
1080 case 'q':
1081 case CTRL('c'):
1082 goto out;
1007 default: 1083 default:
1008 return key; 1084 break;
1009 } 1085 }
1010 } 1086 }
1011 1087
1088out:
1089 ui_browser__hide(&menu->b);
1012 return key; 1090 return key;
1013} 1091}
1092
1093static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
1094 const char *help)
1095{
1096 struct perf_evsel *pos;
1097 struct perf_evsel_menu menu = {
1098 .b = {
1099 .entries = &evlist->entries,
1100 .refresh = ui_browser__list_head_refresh,
1101 .seek = ui_browser__list_head_seek,
1102 .write = perf_evsel_menu__write,
1103 .nr_entries = evlist->nr_entries,
1104 .priv = evlist,
1105 },
1106 };
1107
1108 ui_helpline__push("Press ESC to exit");
1109
1110 list_for_each_entry(pos, &evlist->entries, node) {
1111 const char *ev_name = event_name(pos);
1112 size_t line_len = strlen(ev_name) + 7;
1113
1114 if (menu.b.width < line_len)
1115 menu.b.width = line_len;
1116 /*
1117 * Cache the evsel name, tracepoints have a _high_ cost per
1118 * event_name() call.
1119 */
1120 if (pos->name == NULL)
1121 pos->name = strdup(ev_name);
1122 }
1123
1124 return perf_evsel_menu__run(&menu, help);
1125}
1126
1127int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help)
1128{
1129
1130 if (evlist->nr_entries == 1) {
1131 struct perf_evsel *first = list_entry(evlist->entries.next,
1132 struct perf_evsel, node);
1133 const char *ev_name = event_name(first);
1134 return perf_evsel__hists_browse(first, help, ev_name, false);
1135 }
1136
1137 return __perf_evlist__tui_browse_hists(evlist, help);
1138}
diff --git a/tools/perf/util/ui/browsers/map.c b/tools/perf/util/ui/browsers/map.c
index e5158369106e..8462bffe20bc 100644
--- a/tools/perf/util/ui/browsers/map.c
+++ b/tools/perf/util/ui/browsers/map.c
@@ -41,7 +41,7 @@ static int ui_entry__read(const char *title, char *bf, size_t size, int width)
41out_free_form: 41out_free_form:
42 newtPopWindow(); 42 newtPopWindow();
43 newtFormDestroy(form); 43 newtFormDestroy(form);
44 return 0; 44 return err;
45} 45}
46 46
47struct map_browser { 47struct map_browser {
diff --git a/tools/perf/util/ui/browsers/top.c b/tools/perf/util/ui/browsers/top.c
new file mode 100644
index 000000000000..5a06538532af
--- /dev/null
+++ b/tools/perf/util/ui/browsers/top.c
@@ -0,0 +1,213 @@
1/*
2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
3 *
4 * Parts came from builtin-{top,stat,record}.c, see those files for further
5 * copyright notes.
6 *
7 * Released under the GPL v2. (and only v2, not any later version)
8 */
9#include "../browser.h"
10#include "../../annotate.h"
11#include "../helpline.h"
12#include "../libslang.h"
13#include "../util.h"
14#include "../../evlist.h"
15#include "../../hist.h"
16#include "../../sort.h"
17#include "../../symbol.h"
18#include "../../top.h"
19
20struct perf_top_browser {
21 struct ui_browser b;
22 struct rb_root root;
23 struct sym_entry *selection;
24 float sum_ksamples;
25 int dso_width;
26 int dso_short_width;
27 int sym_width;
28};
29
30static void perf_top_browser__write(struct ui_browser *browser, void *entry, int row)
31{
32 struct perf_top_browser *top_browser = container_of(browser, struct perf_top_browser, b);
33 struct sym_entry *syme = rb_entry(entry, struct sym_entry, rb_node);
34 bool current_entry = ui_browser__is_current_entry(browser, row);
35 struct symbol *symbol = sym_entry__symbol(syme);
36 struct perf_top *top = browser->priv;
37 int width = browser->width;
38 double pcnt;
39
40 pcnt = 100.0 - (100.0 * ((top_browser->sum_ksamples - syme->snap_count) /
41 top_browser->sum_ksamples));
42 ui_browser__set_percent_color(browser, pcnt, current_entry);
43
44 if (top->evlist->nr_entries == 1 || !top->display_weighted) {
45 slsmg_printf("%20.2f ", syme->weight);
46 width -= 24;
47 } else {
48 slsmg_printf("%9.1f %10ld ", syme->weight, syme->snap_count);
49 width -= 23;
50 }
51
52 slsmg_printf("%4.1f%%", pcnt);
53 width -= 7;
54
55 if (verbose) {
56 slsmg_printf(" %016" PRIx64, symbol->start);
57 width -= 17;
58 }
59
60 slsmg_printf(" %-*.*s ", top_browser->sym_width, top_browser->sym_width,
61 symbol->name);
62 width -= top_browser->sym_width;
63 slsmg_write_nstring(width >= syme->map->dso->long_name_len ?
64 syme->map->dso->long_name :
65 syme->map->dso->short_name, width);
66
67 if (current_entry)
68 top_browser->selection = syme;
69}
70
71static void perf_top_browser__update_rb_tree(struct perf_top_browser *browser)
72{
73 struct perf_top *top = browser->b.priv;
74 u64 top_idx = browser->b.top_idx;
75
76 browser->root = RB_ROOT;
77 browser->b.top = NULL;
78 browser->sum_ksamples = perf_top__decay_samples(top, &browser->root);
79 /*
80 * No active symbols
81 */
82 if (top->rb_entries == 0)
83 return;
84
85 perf_top__find_widths(top, &browser->root, &browser->dso_width,
86 &browser->dso_short_width,
87 &browser->sym_width);
88 if (browser->sym_width + browser->dso_width > browser->b.width - 29) {
89 browser->dso_width = browser->dso_short_width;
90 if (browser->sym_width + browser->dso_width > browser->b.width - 29)
91 browser->sym_width = browser->b.width - browser->dso_width - 29;
92 }
93
94 /*
95 * Adjust the ui_browser indexes since the entries in the browser->root
96 * rb_tree may have changed, then seek it from start, so that we get a
97 * possible new top of the screen.
98 */
99 browser->b.nr_entries = top->rb_entries;
100
101 if (top_idx >= browser->b.nr_entries) {
102 if (browser->b.height >= browser->b.nr_entries)
103 top_idx = browser->b.nr_entries - browser->b.height;
104 else
105 top_idx = 0;
106 }
107
108 if (browser->b.index >= top_idx + browser->b.height)
109 browser->b.index = top_idx + browser->b.index - browser->b.top_idx;
110
111 if (browser->b.index >= browser->b.nr_entries)
112 browser->b.index = browser->b.nr_entries - 1;
113
114 browser->b.top_idx = top_idx;
115 browser->b.seek(&browser->b, top_idx, SEEK_SET);
116}
117
118static void perf_top_browser__annotate(struct perf_top_browser *browser)
119{
120 struct sym_entry *syme = browser->selection;
121 struct symbol *sym = sym_entry__symbol(syme);
122 struct annotation *notes = symbol__annotation(sym);
123 struct perf_top *top = browser->b.priv;
124
125 if (notes->src != NULL)
126 goto do_annotation;
127
128 pthread_mutex_lock(&notes->lock);
129
130 top->sym_filter_entry = NULL;
131
132 if (symbol__alloc_hist(sym, top->evlist->nr_entries) < 0) {
133 pr_err("Not enough memory for annotating '%s' symbol!\n",
134 sym->name);
135 pthread_mutex_unlock(&notes->lock);
136 return;
137 }
138
139 top->sym_filter_entry = syme;
140
141 pthread_mutex_unlock(&notes->lock);
142do_annotation:
143 symbol__tui_annotate(sym, syme->map, 0, top->delay_secs * 1000);
144}
145
146static int perf_top_browser__run(struct perf_top_browser *browser)
147{
148 int key;
149 char title[160];
150 struct perf_top *top = browser->b.priv;
151 int delay_msecs = top->delay_secs * 1000;
152 int exit_keys[] = { 'a', NEWT_KEY_ENTER, NEWT_KEY_RIGHT, 0, };
153
154 perf_top_browser__update_rb_tree(browser);
155 perf_top__header_snprintf(top, title, sizeof(title));
156 perf_top__reset_sample_counters(top);
157
158 if (ui_browser__show(&browser->b, title,
159 "ESC: exit, ENTER|->|a: Live Annotate") < 0)
160 return -1;
161
162 newtFormSetTimer(browser->b.form, delay_msecs);
163 ui_browser__add_exit_keys(&browser->b, exit_keys);
164
165 while (1) {
166 key = ui_browser__run(&browser->b);
167
168 switch (key) {
169 case -1:
170 /* FIXME we need to check if it was es.reason == NEWT_EXIT_TIMER */
171 perf_top_browser__update_rb_tree(browser);
172 perf_top__header_snprintf(top, title, sizeof(title));
173 perf_top__reset_sample_counters(top);
174 ui_browser__set_color(&browser->b, NEWT_COLORSET_ROOT);
175 SLsmg_gotorc(0, 0);
176 slsmg_write_nstring(title, browser->b.width);
177 break;
178 case 'a':
179 case NEWT_KEY_RIGHT:
180 case NEWT_KEY_ENTER:
181 if (browser->selection)
182 perf_top_browser__annotate(browser);
183 break;
184 case NEWT_KEY_LEFT:
185 continue;
186 case NEWT_KEY_ESCAPE:
187 if (!ui__dialog_yesno("Do you really want to exit?"))
188 continue;
189 /* Fall thru */
190 default:
191 goto out;
192 }
193 }
194out:
195 ui_browser__hide(&browser->b);
196 return key;
197}
198
199int perf_top__tui_browser(struct perf_top *top)
200{
201 struct perf_top_browser browser = {
202 .b = {
203 .entries = &browser.root,
204 .refresh = ui_browser__rb_tree_refresh,
205 .seek = ui_browser__rb_tree_seek,
206 .write = perf_top_browser__write,
207 .priv = top,
208 },
209 };
210
211 ui_helpline__push("Press <- or ESC to exit");
212 return perf_top_browser__run(&browser);
213}
diff --git a/tools/perf/util/ui/helpline.c b/tools/perf/util/ui/helpline.c
index 8d79daa4458a..f36d2ff509ed 100644
--- a/tools/perf/util/ui/helpline.c
+++ b/tools/perf/util/ui/helpline.c
@@ -5,6 +5,7 @@
5 5
6#include "../debug.h" 6#include "../debug.h"
7#include "helpline.h" 7#include "helpline.h"
8#include "ui.h"
8 9
9void ui_helpline__pop(void) 10void ui_helpline__pop(void)
10{ 11{
@@ -55,7 +56,8 @@ int ui_helpline__show_help(const char *format, va_list ap)
55 int ret; 56 int ret;
56 static int backlog; 57 static int backlog;
57 58
58 ret = vsnprintf(ui_helpline__last_msg + backlog, 59 pthread_mutex_lock(&ui__lock);
60 ret = vsnprintf(ui_helpline__last_msg + backlog,
59 sizeof(ui_helpline__last_msg) - backlog, format, ap); 61 sizeof(ui_helpline__last_msg) - backlog, format, ap);
60 backlog += ret; 62 backlog += ret;
61 63
@@ -64,6 +66,7 @@ int ui_helpline__show_help(const char *format, va_list ap)
64 newtRefresh(); 66 newtRefresh();
65 backlog = 0; 67 backlog = 0;
66 } 68 }
69 pthread_mutex_unlock(&ui__lock);
67 70
68 return ret; 71 return ret;
69} 72}
diff --git a/tools/perf/util/ui/libslang.h b/tools/perf/util/ui/libslang.h
index 5623da8e8080..2b63e1c9b181 100644
--- a/tools/perf/util/ui/libslang.h
+++ b/tools/perf/util/ui/libslang.h
@@ -13,11 +13,11 @@
13 13
14#if SLANG_VERSION < 20104 14#if SLANG_VERSION < 20104
15#define slsmg_printf(msg, args...) \ 15#define slsmg_printf(msg, args...) \
16 SLsmg_printf((char *)msg, ##args) 16 SLsmg_printf((char *)(msg), ##args)
17#define slsmg_write_nstring(msg, len) \ 17#define slsmg_write_nstring(msg, len) \
18 SLsmg_write_nstring((char *)msg, len) 18 SLsmg_write_nstring((char *)(msg), len)
19#define sltt_set_color(obj, name, fg, bg) \ 19#define sltt_set_color(obj, name, fg, bg) \
20 SLtt_set_color(obj,(char *)name, (char *)fg, (char *)bg) 20 SLtt_set_color(obj,(char *)(name), (char *)(fg), (char *)(bg))
21#else 21#else
22#define slsmg_printf SLsmg_printf 22#define slsmg_printf SLsmg_printf
23#define slsmg_write_nstring SLsmg_write_nstring 23#define slsmg_write_nstring SLsmg_write_nstring
diff --git a/tools/perf/util/ui/setup.c b/tools/perf/util/ui/setup.c
index 662085032eb7..ee46d671db59 100644
--- a/tools/perf/util/ui/setup.c
+++ b/tools/perf/util/ui/setup.c
@@ -6,6 +6,9 @@
6#include "../debug.h" 6#include "../debug.h"
7#include "browser.h" 7#include "browser.h"
8#include "helpline.h" 8#include "helpline.h"
9#include "ui.h"
10
11pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER;
9 12
10static void newt_suspend(void *d __used) 13static void newt_suspend(void *d __used)
11{ 14{
@@ -14,11 +17,12 @@ static void newt_suspend(void *d __used)
14 newtResume(); 17 newtResume();
15} 18}
16 19
17void setup_browser(void) 20void setup_browser(bool fallback_to_pager)
18{ 21{
19 if (!isatty(1) || !use_browser || dump_trace) { 22 if (!isatty(1) || !use_browser || dump_trace) {
20 use_browser = 0; 23 use_browser = 0;
21 setup_pager(); 24 if (fallback_to_pager)
25 setup_pager();
22 return; 26 return;
23 } 27 }
24 28
diff --git a/tools/perf/util/ui/ui.h b/tools/perf/util/ui/ui.h
new file mode 100644
index 000000000000..d264e059c829
--- /dev/null
+++ b/tools/perf/util/ui/ui.h
@@ -0,0 +1,8 @@
1#ifndef _PERF_UI_H_
2#define _PERF_UI_H_ 1
3
4#include <pthread.h>
5
6extern pthread_mutex_t ui__lock;
7
8#endif /* _PERF_UI_H_ */
diff --git a/tools/perf/util/ui/util.c b/tools/perf/util/ui/util.c
index 7b5a8926624e..fdf1fc8f08bc 100644
--- a/tools/perf/util/ui/util.c
+++ b/tools/perf/util/ui/util.c
@@ -9,6 +9,7 @@
9#include "../debug.h" 9#include "../debug.h"
10#include "browser.h" 10#include "browser.h"
11#include "helpline.h" 11#include "helpline.h"
12#include "ui.h"
12#include "util.h" 13#include "util.h"
13 14
14static void newt_form__set_exit_keys(newtComponent self) 15static void newt_form__set_exit_keys(newtComponent self)
@@ -118,10 +119,12 @@ void ui__warning(const char *format, ...)
118 va_list args; 119 va_list args;
119 120
120 va_start(args, format); 121 va_start(args, format);
121 if (use_browser > 0) 122 if (use_browser > 0) {
123 pthread_mutex_lock(&ui__lock);
122 newtWinMessagev((char *)warning_str, (char *)ok, 124 newtWinMessagev((char *)warning_str, (char *)ok,
123 (char *)format, args); 125 (char *)format, args);
124 else 126 pthread_mutex_unlock(&ui__lock);
127 } else
125 vfprintf(stderr, format, args); 128 vfprintf(stderr, format, args);
126 va_end(args); 129 va_end(args);
127} 130}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index e833f26f3bfc..fc784284ac8b 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -70,9 +70,7 @@
70#include <sys/poll.h> 70#include <sys/poll.h>
71#include <sys/socket.h> 71#include <sys/socket.h>
72#include <sys/ioctl.h> 72#include <sys/ioctl.h>
73#ifndef NO_SYS_SELECT_H
74#include <sys/select.h> 73#include <sys/select.h>
75#endif
76#include <netinet/in.h> 74#include <netinet/in.h>
77#include <netinet/tcp.h> 75#include <netinet/tcp.h>
78#include <arpa/inet.h> 76#include <arpa/inet.h>
@@ -83,10 +81,6 @@
83#include "types.h" 81#include "types.h"
84#include <sys/ttydefaults.h> 82#include <sys/ttydefaults.h>
85 83
86#ifndef NO_ICONV
87#include <iconv.h>
88#endif
89
90extern const char *graph_line; 84extern const char *graph_line;
91extern const char *graph_dotted_line; 85extern const char *graph_dotted_line;
92extern char buildid_dir[]; 86extern char buildid_dir[];
@@ -236,26 +230,6 @@ static inline int sane_case(int x, int high)
236 return x; 230 return x;
237} 231}
238 232
239#ifndef DIR_HAS_BSD_GROUP_SEMANTICS
240# define FORCE_DIR_SET_GID S_ISGID
241#else
242# define FORCE_DIR_SET_GID 0
243#endif
244
245#ifdef NO_NSEC
246#undef USE_NSEC
247#define ST_CTIME_NSEC(st) 0
248#define ST_MTIME_NSEC(st) 0
249#else
250#ifdef USE_ST_TIMESPEC
251#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctimespec.tv_nsec))
252#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtimespec.tv_nsec))
253#else
254#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctim.tv_nsec))
255#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtim.tv_nsec))
256#endif
257#endif
258
259int mkdir_p(char *path, mode_t mode); 233int mkdir_p(char *path, mode_t mode);
260int copyfile(const char *from, const char *to); 234int copyfile(const char *from, const char *to);
261 235
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index e1c62eeb88f5..ba7c63af6f3b 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -1,6 +1,6 @@
1#!/usr/bin/perl -w 1#!/usr/bin/perl -w
2# 2#
3# Copywrite 2010 - Steven Rostedt <srostedt@redhat.com>, Red Hat Inc. 3# Copyright 2010 - Steven Rostedt <srostedt@redhat.com>, Red Hat Inc.
4# Licensed under the terms of the GNU GPL License version 2 4# Licensed under the terms of the GNU GPL License version 2
5# 5#
6 6